[DOC] Use RDoc inclusions in string.c (#5683)

As @peterzhu2118 and @duerst have pointed out, putting string method's RDoc into doc/ (which allows non-ASCII in examples) makes the "click to toggle source" feature not work for that method. This PR moves the primary method doc back into string.c, then includes RDoc from doc/string/*.rdoc, and also removes doc/string.rdoc. The affected methods are: ::new #bytes #each_byte #each_line #split The call-seq is in string.c because it works there; it did not work when the call-seq is in doc/string/*.rdoc. This PR also updates the relevant guidance in doc/documentation_guide.rdoc.
Merged-By: BurdetteLamar <BurdetteLamar@Yahoo.com>
2022-11-09 12:17:21 -05:00 · 2022-03-21 14:58:00 -05:00 · 2022-03-21 14:58:00 -05:00 · c129b6119d · 2022-03-22 04:58:22 +09:00
commit c129b6119d
parent 1fd1f7bbfc
8 changed files with 261 additions and 274 deletions
--- a/doc/documentation_guide.rdoc
+++ b/doc/documentation_guide.rdoc
@ -60,34 +60,25 @@ involving new files <tt>doc/*.rdoc</tt>:
    # Documentation for module Bar goes here.
    module Bar; end

- For an instance method Baz#bat (defined in file <tt>baz.c</tt>),
-  create file <tt>doc/baz.rdoc</tt>, declare class +Baz+
-  and instance method +bat+, and place the method documentation above
-  the method declaration:
+- For a method, things are different.
+  Documenting a method as above disables the "click to toggle source" feature
+  in the rendered documentaion.

-    # :markup: ruby
-    class Baz
-      # Documentation for method bat goes here.
-      # (Don't forget the call-seq.)
-      def bat; end
-    end
+  Therefore it's best to use file inclusion:

- For a singleton method Bam.bah (defined in file <tt>bam.c</tt>),
-  create file <tt>doc/bam.rdoc</tt>, declare class +Bam+
-  and singleton method +bah+, and place the method documentation above
-  the method declaration:
+  - Retain the call-seq in the C code.
+  - Use file inclusion (+:include:+) to include text from an .rdoc file.

-    # :markup: ruby
-    class Bam
-      # Documentation for method bah goes here.
-      # (Don't forget the call-seq.)
-      def self.bah; end
-    end
+  Example:

-  See these examples:
-
-  - https://raw.githubusercontent.com/ruby/ruby/master/doc/string.rdoc
-  - https://raw.githubusercontent.com/ruby/ruby/master/doc/transcode.rdoc
+    /*
+     *  call-seq:
+     *    each_byte {|byte| ... } -> self
+     *    each_byte               -> enumerator
+     *
+     *  \:include: doc/string/each_byte.rdoc
+     *
+     */

 === \RDoc

--- a/doc/string.rdoc
+++ b/doc/string.rdoc
@ -1,245 +0,0 @@
-# :markup: ruby
-
-class String
-  # call-seq:
-  #   String.new(string = '') -> new_string
-  #   String.new(string = '', encoding: encoding) -> new_string
-  #   String.new(string = '', capacity: size) -> new_string
-  #
-  # Returns a new \String that is a copy of +string+.
-  #
-  # With no arguments, returns the empty string with the Encoding <tt>ASCII-8BIT</tt>:
-  #   s = String.new
-  #   s # => ""
-  #   s.encoding # => #<Encoding:ASCII-8BIT>
-  #
-  # With the single \String argument +string+, returns a copy of +string+
-  # with the same encoding as +string+:
-  #   s = String.new('Que veut dire ça?')
-  #   s # => "Que veut dire ça?"
-  #   s.encoding # => #<Encoding:UTF-8>
-  #
-  # Literal strings like <tt>""</tt> or here-documents always use
-  # Encoding@Script+encoding, unlike String.new.
-  #
-  # With keyword +encoding+, returns a copy of +str+
-  # with the specified encoding:
-  #   s = String.new(encoding: 'ASCII')
-  #   s.encoding # => #<Encoding:US-ASCII>
-  #   s = String.new('foo', encoding: 'ASCII')
-  #   s.encoding # => #<Encoding:US-ASCII>
-  #
-  # Note that these are equivalent:
-  #   s0 = String.new('foo', encoding: 'ASCII')
-  #   s1 = 'foo'.force_encoding('ASCII')
-  #   s0.encoding == s1.encoding # => true
-  #
-  # With keyword +capacity+, returns a copy of +str+;
-  # the given +capacity+ may set the size of the internal buffer,
-  # which may affect performance:
-  #   String.new(capacity: 1) # => ""
-  #   String.new(capacity: 4096) # => ""
-  #
-  # The +string+, +encoding+, and +capacity+ arguments may all be used together:
-  #
-  #   String.new('hello', encoding: 'UTF-8', capacity: 25)
-  #
-  def initialize(str = '', encoding: nil, capacity: nil)
-    Primitive.rb_str_init(str, encoding, capacity)
-  end
-
-  #  call-seq:
-  #    bytes -> array_of_bytes
-  #
-  #  Returns an array of the bytes in +self+:
-  #
-  #    'hello'.bytes # => [104, 101, 108, 108, 111]
-  #    'тест'.bytes  # => [209, 130, 208, 181, 209, 129, 209, 130]
-  #    'こんにちは'.bytes
-  #    # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175]
-  #
-  def bytes; end
-
-  #  call-seq:
-  #    each_byte {|byte| ... } -> self
-  #    each_byte               -> enumerator
-  #
-  #  Calls the given block with each successive byte from +self+;
-  #  returns +self+:
-  #
-  #    'hello'.each_byte {|byte| print byte, ' ' }
-  #    print "\n"
-  #    'тест'.each_byte {|byte| print byte, ' ' }
-  #    print "\n"
-  #    'こんにちは'.each_byte {|byte| print byte, ' ' }
-  #    print "\n"
-  #
-  #  Output:
-  #
-  #    104 101 108 108 111
-  #    209 130 208 181 209 129 209 130
-  #    227 129 147 227 130 147 227 129 171 227 129 161 227 129 175
-  #
-  #  Returns an enumerator if no block is given.
-  def each_byte; end
-
-
-  #  call-seq:
-  #    each_line(line_sep = $/, chomp: false) {|substring| ... } -> self
-  #    each_line(line_sep = $/, chomp: false)                    -> enumerator
-  #
-  #  With a block given, forms the substrings ("lines")
-  #  that are the result of splitting +self+
-  #  at each occurrence of the given line separator +line_sep+;
-  #  passes each line to the block;
-  #  returns +self+:
-  #
-  #    s = <<~EOT
-  #    This is the first line.
-  #    This is line two.
-  #
-  #    This is line four.
-  #    This is line five.
-  #    EOT
-  #
-  #    s.each_line {|line| p line }
-  #
-  #  Output:
-  #
-  #    "This is the first line.\n"
-  #    "This is line two.\n"
-  #    "\n"
-  #    "This is line four.\n"
-  #    "This is line five.\n"
-  #
-  #  With a different +line_sep+:
-  #
-  #    s.each_line(' is ') {|line| p line }
-  #
-  #  Output:
-  #
-  #    "This is "
-  #    "the first line.\nThis is "
-  #    "line two.\n\nThis is "
-  #    "line four.\nThis is "
-  #    "line five.\n"
-  #
-  #  With +chomp+ as +true+, removes the trailing +line_sep+ from each line:
-  #
-  #    s.each_line(chomp: true) {|line| p line }
-  #
-  #  Output:
-  #
-  #    "This is the first line."
-  #    "This is line two."
-  #    ""
-  #    "This is line four."
-  #    "This is line five."
-  #
-  #  With an empty string as +line_sep+,
-  #  forms and passes "paragraphs" by splitting at each occurrence
-  #  of two or more newlines:
-  #
-  #    s.each_line('') {|line| p line }
-  #
-  #  Output:
-  #
-  #    "This is the first line.\nThis is line two.\n\n"
-  #    "This is line four.\nThis is line five.\n"
-  #
-  #  With no block given, returns an enumerator.
-  #
-  def each_line; end
-
-  #  call-seq:
-  #    split(field_sep = $;, limit = nil) -> array
-  #    split(field_sep = $;, limit = nil) {|substring| ... } -> self
-  #
-  #  Returns an array of substrings of +self+
-  #  that are the result of splitting +self+
-  #  at each occurrence of the given field separator +field_sep+.
-  #
-  #  When +field_sep+ is <tt>$;</tt>:
-  #
-  #  - If <tt>$;</tt> is +nil+ (its default value),
-  #    the split occurs just as if +field_sep+ were given as a space character
-  #    (see below).
-  #
-  #  - If <tt>$;</tt> is a string,
-  #    the split ocurs just as if +field_sep+ were given as that string
-  #    (see below).
-  #
-  #  When +field_sep+ is <tt>' '</tt> and +limit+ is +nil+,
-  #  the split occurs at each sequence of whitespace:
-  #
-  #    'abc def ghi'.split(' ')          # => ["abc", "def", "ghi"]
-  #    "abc \n\tdef\t\n  ghi".split(' ') # => ["abc", "def", "ghi"]
-  #    'abc  def   ghi'.split(' ')       # => ["abc", "def", "ghi"]
-  #    ''.split(' ')                     # => []
-  #
-  #  When +field_sep+ is a string different from <tt>' '</tt>
-  #  and +limit+ is +nil+,
-  #  the split occurs at each occurrence of +field_sep+;
-  #  trailing empty substrings are not returned:
-  #
-  #    'abracadabra'.split('ab')   # => ["", "racad", "ra"]
-  #    'aaabcdaaa'.split('a')      # => ["", "", "", "bcd"]
-  #    ''.split('a')               # => []
-  #    '3.14159'.split('1')        # => ["3.", "4", "59"]
-  #    '!@#$%^$&*($)_+'.split('$') # => ["!@#", "%^", "&*(", ")_+"]
-  #    'тест'.split('т')           # => ["", "ес"]
-  #    'こんにちは'.split('に')      # => ["こん", "ちは"]
-  #
-  #  When +field_sep+ is a Regexp and +limit+ is +nil+,
-  #  the split occurs at each occurrence of a match;
-  #  trailing empty substrings are not returned:
-  #
-  #    'abracadabra'.split(/ab/) # => ["", "racad", "ra"]
-  #    'aaabcdaaa'.split(/a/)    # => ["", "", "", "bcd"]
-  #    'aaabcdaaa'.split(//)     # => ["a", "a", "a", "b", "c", "d", "a", "a", "a"]
-  #    '1 + 1 == 2'.split(/\W+/) # => ["1", "1", "2"]
-  #
-  #  If the \Regexp contains groups, their matches are also included
-  #  in the returned array:
-  #
-  #    '1:2:3'.split(/(:)()()/, 2) # => ["1", ":", "", "", "2:3"]
-  #
-  #  As seen above, if +limit+ is +nil+,
-  #  trailing empty substrings are not returned;
-  #  the same is true if +limit+ is zero:
-  #
-  #    'aaabcdaaa'.split('a')    # => ["", "", "", "bcd"]
-  #    'aaabcdaaa'.split('a', 0) # => ["", "", "", "bcd"]
-  #
-  #  If +limit+ is positive integer +n+, no more than <tt>n - 1-</tt>
-  #  splits occur, so that at most +n+ substrings are returned,
-  #  and trailing empty substrings are included:
-  #
-  #    'aaabcdaaa'.split('a', 1) # => ["aaabcdaaa"]
-  #    'aaabcdaaa'.split('a', 2) # => ["", "aabcdaaa"]
-  #    'aaabcdaaa'.split('a', 5) # => ["", "", "", "bcd", "aa"]
-  #    'aaabcdaaa'.split('a', 7) # => ["", "", "", "bcd", "", "", ""]
-  #    'aaabcdaaa'.split('a', 8) # => ["", "", "", "bcd", "", "", ""]
-  #
-  #  Note that if +field_sep+ is a \Regexp containing groups,
-  #  their matches are in the returned array, but do not count toward the limit.
-  #
-  #  If +limit+ is negative, it behaves the same as if +limit+ was +nil+,
-  #  meaning that there is no limit,
-  #  and trailing empty substrings are included:
-  #
-  #    'aaabcdaaa'.split('a', -1) # => ["", "", "", "bcd", "", "", ""]
-  #
-  #  If a block is given, it is called with each substring:
-  #
-  #    'abc def ghi'.split(' ') {|substring| p substring }
-  #
-  #  Output:
-  #
-  #    "abc"
-  #    "def"
-  #    "ghi"
-  #
-  def split; end
-
-end
--- a/doc/string/bytes.rdoc
+++ b/doc/string/bytes.rdoc
@ -0,0 +1,6 @@
+Returns an array of the bytes in +self+:
+
+  'hello'.bytes # => [104, 101, 108, 108, 111]
+  'тест'.bytes  # => [209, 130, 208, 181, 209, 129, 209, 130]
+  'こんにちは'.bytes
+  # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175]
--- a/doc/string/each_byte.rdoc
+++ b/doc/string/each_byte.rdoc
@ -0,0 +1,17 @@
+Calls the given block with each successive byte from +self+;
+returns +self+:
+
+  'hello'.each_byte {|byte| print byte, ' ' }
+  print "\n"
+  'тест'.each_byte {|byte| print byte, ' ' }
+  print "\n"
+  'こんにちは'.each_byte {|byte| print byte, ' ' }
+  print "\n"
+
+Output:
+
+  104 101 108 108 111
+  209 130 208 181 209 129 209 130
+  227 129 147 227 130 147 227 129 171 227 129 161 227 129 175
+
+Returns an enumerator if no block is given.
--- a/doc/string/each_line.rdoc
+++ b/doc/string/each_line.rdoc
@ -0,0 +1,60 @@
+With a block given, forms the substrings ("lines")
+that are the result of splitting +self+
+at each occurrence of the given line separator +line_sep+;
+passes each line to the block;
+returns +self+:
+
+  s = <<~EOT
+  This is the first line.
+  This is line two.
+
+  This is line four.
+  This is line five.
+  EOT
+
+  s.each_line {|line| p line }
+
+Output:
+
+  "This is the first line.\n"
+  "This is line two.\n"
+  "\n"
+  "This is line four.\n"
+  "This is line five.\n"
+
+With a different +line_sep+:
+
+  s.each_line(' is ') {|line| p line }
+
+Output:
+
+  "This is "
+  "the first line.\nThis is "
+  "line two.\n\nThis is "
+  "line four.\nThis is "
+  "line five.\n"
+
+With +chomp+ as +true+, removes the trailing +line_sep+ from each line:
+
+  s.each_line(chomp: true) {|line| p line }
+
+Output:
+
+  "This is the first line."
+  "This is line two."
+  ""
+  "This is line four."
+  "This is line five."
+
+With an empty string as +line_sep+,
+forms and passes "paragraphs" by splitting at each occurrence
+of two or more newlines:
+
+  s.each_line('') {|line| p line }
+
+Output:
+
+  "This is the first line.\nThis is line two.\n\n"
+  "This is line four.\nThis is line five.\n"
+
+With no block given, returns an enumerator.
--- a/doc/string/new.rdoc
+++ b/doc/string/new.rdoc
@ -0,0 +1,37 @@
+Returns a new \String that is a copy of +string+.
+
+With no arguments, returns the empty string with the Encoding <tt>ASCII-8BIT</tt>:
+  s = String.new
+  s # => ""
+  s.encoding # => #<Encoding:ASCII-8BIT>
+
+With the single \String argument +string+, returns a copy of +string+
+with the same encoding as +string+:
+  s = String.new('Que veut dire ça?')
+  s # => "Que veut dire ça?"
+  s.encoding # => #<Encoding:UTF-8>
+
+Literal strings like <tt>""</tt> or here-documents always use
+Encoding@Script+encoding, unlike String.new.
+
+With keyword +encoding+, returns a copy of +str+
+with the specified encoding:
+  s = String.new(encoding: 'ASCII')
+  s.encoding # => #<Encoding:US-ASCII>
+  s = String.new('foo', encoding: 'ASCII')
+  s.encoding # => #<Encoding:US-ASCII>
+
+Note that these are equivalent:
+  s0 = String.new('foo', encoding: 'ASCII')
+  s1 = 'foo'.force_encoding('ASCII')
+  s0.encoding == s1.encoding # => true
+
+With keyword +capacity+, returns a copy of +str+;
+the given +capacity+ may set the size of the internal buffer,
+which may affect performance:
+  String.new(capacity: 1) # => ""
+  String.new(capacity: 4096) # => ""
+
+The +string+, +encoding+, and +capacity+ arguments may all be used together:
+
+  String.new('hello', encoding: 'UTF-8', capacity: 25)
--- a/doc/string/split.rdoc
+++ b/doc/string/split.rdoc
@ -0,0 +1,84 @@
+Returns an array of substrings of +self+
+that are the result of splitting +self+
+at each occurrence of the given field separator +field_sep+.
+
+When +field_sep+ is <tt>$;</tt>:
+
+- If <tt>$;</tt> is +nil+ (its default value),
+  the split occurs just as if +field_sep+ were given as a space character
+  (see below).
+
+- If <tt>$;</tt> is a string,
+  the split ocurs just as if +field_sep+ were given as that string
+  (see below).
+
+When +field_sep+ is <tt>' '</tt> and +limit+ is +nil+,
+the split occurs at each sequence of whitespace:
+
+  'abc def ghi'.split(' ')         => ["abc", "def", "ghi"]
+  "abc \n\tdef\t\n  ghi".split(' ') # => ["abc", "def", "ghi"]
+  'abc  def   ghi'.split(' ')      => ["abc", "def", "ghi"]
+  ''.split(' ')                    => []
+
+When +field_sep+ is a string different from <tt>' '</tt>
+and +limit+ is +nil+,
+the split occurs at each occurrence of +field_sep+;
+trailing empty substrings are not returned:
+
+  'abracadabra'.split('ab')  => ["", "racad", "ra"]
+  'aaabcdaaa'.split('a')     => ["", "", "", "bcd"]
+  ''.split('a')              => []
+  '3.14159'.split('1')       => ["3.", "4", "59"]
+  '!@#$%^$&*($)_+'.split('$') # => ["!@#", "%^", "&*(", ")_+"]
+  'тест'.split('т')          => ["", "ес"]
+  'こんにちは'.split('に')     => ["こん", "ちは"]
+
+When +field_sep+ is a Regexp and +limit+ is +nil+,
+the split occurs at each occurrence of a match;
+trailing empty substrings are not returned:
+
+  'abracadabra'.split(/ab/) # => ["", "racad", "ra"]
+  'aaabcdaaa'.split(/a/)   => ["", "", "", "bcd"]
+  'aaabcdaaa'.split(//)    => ["a", "a", "a", "b", "c", "d", "a", "a", "a"]
+  '1 + 1 == 2'.split(/\W+/) # => ["1", "1", "2"]
+
+If the \Regexp contains groups, their matches are also included
+in the returned array:
+
+  '1:2:3'.split(/(:)()()/, 2) # => ["1", ":", "", "", "2:3"]
+
+As seen above, if +limit+ is +nil+,
+trailing empty substrings are not returned;
+the same is true if +limit+ is zero:
+
+  'aaabcdaaa'.split('a')   => ["", "", "", "bcd"]
+  'aaabcdaaa'.split('a', 0) # => ["", "", "", "bcd"]
+
+If +limit+ is positive integer +n+, no more than <tt>n - 1-</tt>
+splits occur, so that at most +n+ substrings are returned,
+and trailing empty substrings are included:
+
+  'aaabcdaaa'.split('a', 1) # => ["aaabcdaaa"]
+  'aaabcdaaa'.split('a', 2) # => ["", "aabcdaaa"]
+  'aaabcdaaa'.split('a', 5) # => ["", "", "", "bcd", "aa"]
+  'aaabcdaaa'.split('a', 7) # => ["", "", "", "bcd", "", "", ""]
+  'aaabcdaaa'.split('a', 8) # => ["", "", "", "bcd", "", "", ""]
+
+Note that if +field_sep+ is a \Regexp containing groups,
+their matches are in the returned array, but do not count toward the limit.
+
+If +limit+ is negative, it behaves the same as if +limit+ was +nil+,
+meaning that there is no limit,
+and trailing empty substrings are included:
+
+  'aaabcdaaa'.split('a', -1) # => ["", "", "", "bcd", "", "", ""]
+
+If a block is given, it is called with each substring:
+
+  'abc def ghi'.split(' ') {|substring| p substring }
+
+Output:
+
+  "abc"
+  "def"
+  "ghi"
--- a/string.c
+++ b/string.c
@ -1810,7 +1810,17 @@ rb_ec_str_resurrect(struct rb_execution_context_struct *ec, VALUE str)
    return ec_str_duplicate(ec, rb_cString, str);
 }

-/* :nodoc: documented in doc/string.rdoc */
+/*
+ *
+ *  call-seq:
+ *    String.new(string = '') -> new_string
+ *    String.new(string = '', encoding: encoding) -> new_string
+ *    String.new(string = '', capacity: size) -> new_string
+ *
+ *  :include: doc/string/new.rdoc
+ *
+ */
+
 static VALUE
 rb_str_init(int argc, VALUE *argv, VALUE str)
 {
@ -8652,7 +8662,14 @@ literal_split_pattern(VALUE spat, split_type_t default_type)
    return default_type;
 }

-// String#split is documented at doc/string.rdoc.
+/*
+ *  :call-seq:
+ *    split(field_sep = $;, limit = nil) -> array
+ *    split(field_sep = $;, limit = nil) {|substring| ... } -> self
+ *
+ *  :include: doc/string/split.rdoc
+ *
+ */

 static VALUE
 rb_str_split_m(int argc, VALUE *argv, VALUE str)
@ -9063,7 +9080,14 @@ rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, VALUE ary)
 	return orig;
 }

-// String#each_line is documented at doc/string.rdoc.
+/*
+ *  call-seq:
+ *    each_line(line_sep = $/, chomp: false) {|substring| ... } -> self
+ *    each_line(line_sep = $/, chomp: false)                    -> enumerator
+ *
+ *  :include: doc/string/each_line.rdoc
+ *
+ */

 static VALUE
 rb_str_each_line(int argc, VALUE *argv, VALUE str)
@ -9108,7 +9132,14 @@ rb_str_enumerate_bytes(VALUE str, VALUE ary)
 	return str;
 }

-// String#each_byte is documented in doc/string.rdoc.
+/*
+ *  call-seq:
+ *    each_byte {|byte| ... } -> self
+ *    each_byte               -> enumerator
+ *
+ *  :include: doc/string/each_byte.rdoc
+ *
+ */

 static VALUE
 rb_str_each_byte(VALUE str)
@ -9117,7 +9148,13 @@ rb_str_each_byte(VALUE str)
    return rb_str_enumerate_bytes(str, 0);
 }

-// String#bytes is documented in doc/string.rdoc.
+/*
+ *  call-seq:
+ *    bytes -> array_of_bytes
+ *
+ *  :include: doc/string/bytes.rdoc
+ *
+ */

 static VALUE
 rb_str_bytes(VALUE str)