1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

[DOC] Use RDoc inclusions in string.c (#5683)

As @peterzhu2118 and @duerst have pointed out, putting string method's RDoc into doc/ (which allows non-ASCII in examples) makes the "click to toggle source" feature not work for that method.

This PR moves the primary method doc back into string.c, then includes RDoc from doc/string/*.rdoc, and also removes doc/string.rdoc.

The affected methods are:

    ::new
    #bytes
    #each_byte
    #each_line
    #split

The call-seq is in string.c because it works there; it did not work when the call-seq is in doc/string/*.rdoc.

This PR also updates the relevant guidance in doc/documentation_guide.rdoc.
This commit is contained in:
Burdette Lamar 2022-03-21 14:58:00 -05:00 committed by GitHub
parent 1fd1f7bbfc
commit c129b6119d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
Notes: git 2022-03-22 04:58:22 +09:00
Merged-By: BurdetteLamar <BurdetteLamar@Yahoo.com>
8 changed files with 261 additions and 274 deletions

View file

@ -60,34 +60,25 @@ involving new files <tt>doc/*.rdoc</tt>:
# Documentation for module Bar goes here.
module Bar; end
- For an instance method Baz#bat (defined in file <tt>baz.c</tt>),
create file <tt>doc/baz.rdoc</tt>, declare class +Baz+
and instance method +bat+, and place the method documentation above
the method declaration:
- For a method, things are different.
Documenting a method as above disables the "click to toggle source" feature
in the rendered documentaion.
# :markup: ruby
class Baz
# Documentation for method bat goes here.
# (Don't forget the call-seq.)
def bat; end
end
Therefore it's best to use file inclusion:
- For a singleton method Bam.bah (defined in file <tt>bam.c</tt>),
create file <tt>doc/bam.rdoc</tt>, declare class +Bam+
and singleton method +bah+, and place the method documentation above
the method declaration:
- Retain the call-seq in the C code.
- Use file inclusion (+:include:+) to include text from an .rdoc file.
# :markup: ruby
class Bam
# Documentation for method bah goes here.
# (Don't forget the call-seq.)
def self.bah; end
end
Example:
See these examples:
- https://raw.githubusercontent.com/ruby/ruby/master/doc/string.rdoc
- https://raw.githubusercontent.com/ruby/ruby/master/doc/transcode.rdoc
/*
* call-seq:
* each_byte {|byte| ... } -> self
* each_byte -> enumerator
*
* \:include: doc/string/each_byte.rdoc
*
*/
=== \RDoc

View file

@ -1,245 +0,0 @@
# :markup: ruby
class String
# call-seq:
# String.new(string = '') -> new_string
# String.new(string = '', encoding: encoding) -> new_string
# String.new(string = '', capacity: size) -> new_string
#
# Returns a new \String that is a copy of +string+.
#
# With no arguments, returns the empty string with the Encoding <tt>ASCII-8BIT</tt>:
# s = String.new
# s # => ""
# s.encoding # => #<Encoding:ASCII-8BIT>
#
# With the single \String argument +string+, returns a copy of +string+
# with the same encoding as +string+:
# s = String.new('Que veut dire ça?')
# s # => "Que veut dire ça?"
# s.encoding # => #<Encoding:UTF-8>
#
# Literal strings like <tt>""</tt> or here-documents always use
# Encoding@Script+encoding, unlike String.new.
#
# With keyword +encoding+, returns a copy of +str+
# with the specified encoding:
# s = String.new(encoding: 'ASCII')
# s.encoding # => #<Encoding:US-ASCII>
# s = String.new('foo', encoding: 'ASCII')
# s.encoding # => #<Encoding:US-ASCII>
#
# Note that these are equivalent:
# s0 = String.new('foo', encoding: 'ASCII')
# s1 = 'foo'.force_encoding('ASCII')
# s0.encoding == s1.encoding # => true
#
# With keyword +capacity+, returns a copy of +str+;
# the given +capacity+ may set the size of the internal buffer,
# which may affect performance:
# String.new(capacity: 1) # => ""
# String.new(capacity: 4096) # => ""
#
# The +string+, +encoding+, and +capacity+ arguments may all be used together:
#
# String.new('hello', encoding: 'UTF-8', capacity: 25)
#
def initialize(str = '', encoding: nil, capacity: nil)
Primitive.rb_str_init(str, encoding, capacity)
end
# call-seq:
# bytes -> array_of_bytes
#
# Returns an array of the bytes in +self+:
#
# 'hello'.bytes # => [104, 101, 108, 108, 111]
# 'тест'.bytes # => [209, 130, 208, 181, 209, 129, 209, 130]
# 'こんにちは'.bytes
# # => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175]
#
def bytes; end
# call-seq:
# each_byte {|byte| ... } -> self
# each_byte -> enumerator
#
# Calls the given block with each successive byte from +self+;
# returns +self+:
#
# 'hello'.each_byte {|byte| print byte, ' ' }
# print "\n"
# 'тест'.each_byte {|byte| print byte, ' ' }
# print "\n"
# 'こんにちは'.each_byte {|byte| print byte, ' ' }
# print "\n"
#
# Output:
#
# 104 101 108 108 111
# 209 130 208 181 209 129 209 130
# 227 129 147 227 130 147 227 129 171 227 129 161 227 129 175
#
# Returns an enumerator if no block is given.
def each_byte; end
# call-seq:
# each_line(line_sep = $/, chomp: false) {|substring| ... } -> self
# each_line(line_sep = $/, chomp: false) -> enumerator
#
# With a block given, forms the substrings ("lines")
# that are the result of splitting +self+
# at each occurrence of the given line separator +line_sep+;
# passes each line to the block;
# returns +self+:
#
# s = <<~EOT
# This is the first line.
# This is line two.
#
# This is line four.
# This is line five.
# EOT
#
# s.each_line {|line| p line }
#
# Output:
#
# "This is the first line.\n"
# "This is line two.\n"
# "\n"
# "This is line four.\n"
# "This is line five.\n"
#
# With a different +line_sep+:
#
# s.each_line(' is ') {|line| p line }
#
# Output:
#
# "This is "
# "the first line.\nThis is "
# "line two.\n\nThis is "
# "line four.\nThis is "
# "line five.\n"
#
# With +chomp+ as +true+, removes the trailing +line_sep+ from each line:
#
# s.each_line(chomp: true) {|line| p line }
#
# Output:
#
# "This is the first line."
# "This is line two."
# ""
# "This is line four."
# "This is line five."
#
# With an empty string as +line_sep+,
# forms and passes "paragraphs" by splitting at each occurrence
# of two or more newlines:
#
# s.each_line('') {|line| p line }
#
# Output:
#
# "This is the first line.\nThis is line two.\n\n"
# "This is line four.\nThis is line five.\n"
#
# With no block given, returns an enumerator.
#
def each_line; end
# call-seq:
# split(field_sep = $;, limit = nil) -> array
# split(field_sep = $;, limit = nil) {|substring| ... } -> self
#
# Returns an array of substrings of +self+
# that are the result of splitting +self+
# at each occurrence of the given field separator +field_sep+.
#
# When +field_sep+ is <tt>$;</tt>:
#
# - If <tt>$;</tt> is +nil+ (its default value),
# the split occurs just as if +field_sep+ were given as a space character
# (see below).
#
# - If <tt>$;</tt> is a string,
# the split ocurs just as if +field_sep+ were given as that string
# (see below).
#
# When +field_sep+ is <tt>' '</tt> and +limit+ is +nil+,
# the split occurs at each sequence of whitespace:
#
# 'abc def ghi'.split(' ') # => ["abc", "def", "ghi"]
# "abc \n\tdef\t\n ghi".split(' ') # => ["abc", "def", "ghi"]
# 'abc def ghi'.split(' ') # => ["abc", "def", "ghi"]
# ''.split(' ') # => []
#
# When +field_sep+ is a string different from <tt>' '</tt>
# and +limit+ is +nil+,
# the split occurs at each occurrence of +field_sep+;
# trailing empty substrings are not returned:
#
# 'abracadabra'.split('ab') # => ["", "racad", "ra"]
# 'aaabcdaaa'.split('a') # => ["", "", "", "bcd"]
# ''.split('a') # => []
# '3.14159'.split('1') # => ["3.", "4", "59"]
# '!@#$%^$&*($)_+'.split('$') # => ["!@#", "%^", "&*(", ")_+"]
# 'тест'.split('т') # => ["", "ес"]
# 'こんにちは'.split('に') # => ["こん", "ちは"]
#
# When +field_sep+ is a Regexp and +limit+ is +nil+,
# the split occurs at each occurrence of a match;
# trailing empty substrings are not returned:
#
# 'abracadabra'.split(/ab/) # => ["", "racad", "ra"]
# 'aaabcdaaa'.split(/a/) # => ["", "", "", "bcd"]
# 'aaabcdaaa'.split(//) # => ["a", "a", "a", "b", "c", "d", "a", "a", "a"]
# '1 + 1 == 2'.split(/\W+/) # => ["1", "1", "2"]
#
# If the \Regexp contains groups, their matches are also included
# in the returned array:
#
# '1:2:3'.split(/(:)()()/, 2) # => ["1", ":", "", "", "2:3"]
#
# As seen above, if +limit+ is +nil+,
# trailing empty substrings are not returned;
# the same is true if +limit+ is zero:
#
# 'aaabcdaaa'.split('a') # => ["", "", "", "bcd"]
# 'aaabcdaaa'.split('a', 0) # => ["", "", "", "bcd"]
#
# If +limit+ is positive integer +n+, no more than <tt>n - 1-</tt>
# splits occur, so that at most +n+ substrings are returned,
# and trailing empty substrings are included:
#
# 'aaabcdaaa'.split('a', 1) # => ["aaabcdaaa"]
# 'aaabcdaaa'.split('a', 2) # => ["", "aabcdaaa"]
# 'aaabcdaaa'.split('a', 5) # => ["", "", "", "bcd", "aa"]
# 'aaabcdaaa'.split('a', 7) # => ["", "", "", "bcd", "", "", ""]
# 'aaabcdaaa'.split('a', 8) # => ["", "", "", "bcd", "", "", ""]
#
# Note that if +field_sep+ is a \Regexp containing groups,
# their matches are in the returned array, but do not count toward the limit.
#
# If +limit+ is negative, it behaves the same as if +limit+ was +nil+,
# meaning that there is no limit,
# and trailing empty substrings are included:
#
# 'aaabcdaaa'.split('a', -1) # => ["", "", "", "bcd", "", "", ""]
#
# If a block is given, it is called with each substring:
#
# 'abc def ghi'.split(' ') {|substring| p substring }
#
# Output:
#
# "abc"
# "def"
# "ghi"
#
def split; end
end

6
doc/string/bytes.rdoc Normal file
View file

@ -0,0 +1,6 @@
Returns an array of the bytes in +self+:
'hello'.bytes # => [104, 101, 108, 108, 111]
'тест'.bytes # => [209, 130, 208, 181, 209, 129, 209, 130]
'こんにちは'.bytes
# => [227, 129, 147, 227, 130, 147, 227, 129, 171, 227, 129, 161, 227, 129, 175]

17
doc/string/each_byte.rdoc Normal file
View file

@ -0,0 +1,17 @@
Calls the given block with each successive byte from +self+;
returns +self+:
'hello'.each_byte {|byte| print byte, ' ' }
print "\n"
'тест'.each_byte {|byte| print byte, ' ' }
print "\n"
'こんにちは'.each_byte {|byte| print byte, ' ' }
print "\n"
Output:
104 101 108 108 111
209 130 208 181 209 129 209 130
227 129 147 227 130 147 227 129 171 227 129 161 227 129 175
Returns an enumerator if no block is given.

60
doc/string/each_line.rdoc Normal file
View file

@ -0,0 +1,60 @@
With a block given, forms the substrings ("lines")
that are the result of splitting +self+
at each occurrence of the given line separator +line_sep+;
passes each line to the block;
returns +self+:
s = <<~EOT
This is the first line.
This is line two.
This is line four.
This is line five.
EOT
s.each_line {|line| p line }
Output:
"This is the first line.\n"
"This is line two.\n"
"\n"
"This is line four.\n"
"This is line five.\n"
With a different +line_sep+:
s.each_line(' is ') {|line| p line }
Output:
"This is "
"the first line.\nThis is "
"line two.\n\nThis is "
"line four.\nThis is "
"line five.\n"
With +chomp+ as +true+, removes the trailing +line_sep+ from each line:
s.each_line(chomp: true) {|line| p line }
Output:
"This is the first line."
"This is line two."
""
"This is line four."
"This is line five."
With an empty string as +line_sep+,
forms and passes "paragraphs" by splitting at each occurrence
of two or more newlines:
s.each_line('') {|line| p line }
Output:
"This is the first line.\nThis is line two.\n\n"
"This is line four.\nThis is line five.\n"
With no block given, returns an enumerator.

37
doc/string/new.rdoc Normal file
View file

@ -0,0 +1,37 @@
Returns a new \String that is a copy of +string+.
With no arguments, returns the empty string with the Encoding <tt>ASCII-8BIT</tt>:
s = String.new
s # => ""
s.encoding # => #<Encoding:ASCII-8BIT>
With the single \String argument +string+, returns a copy of +string+
with the same encoding as +string+:
s = String.new('Que veut dire ça?')
s # => "Que veut dire ça?"
s.encoding # => #<Encoding:UTF-8>
Literal strings like <tt>""</tt> or here-documents always use
Encoding@Script+encoding, unlike String.new.
With keyword +encoding+, returns a copy of +str+
with the specified encoding:
s = String.new(encoding: 'ASCII')
s.encoding # => #<Encoding:US-ASCII>
s = String.new('foo', encoding: 'ASCII')
s.encoding # => #<Encoding:US-ASCII>
Note that these are equivalent:
s0 = String.new('foo', encoding: 'ASCII')
s1 = 'foo'.force_encoding('ASCII')
s0.encoding == s1.encoding # => true
With keyword +capacity+, returns a copy of +str+;
the given +capacity+ may set the size of the internal buffer,
which may affect performance:
String.new(capacity: 1) # => ""
String.new(capacity: 4096) # => ""
The +string+, +encoding+, and +capacity+ arguments may all be used together:
String.new('hello', encoding: 'UTF-8', capacity: 25)

84
doc/string/split.rdoc Normal file
View file

@ -0,0 +1,84 @@
Returns an array of substrings of +self+
that are the result of splitting +self+
at each occurrence of the given field separator +field_sep+.
When +field_sep+ is <tt>$;</tt>:
- If <tt>$;</tt> is +nil+ (its default value),
the split occurs just as if +field_sep+ were given as a space character
(see below).
- If <tt>$;</tt> is a string,
the split ocurs just as if +field_sep+ were given as that string
(see below).
When +field_sep+ is <tt>' '</tt> and +limit+ is +nil+,
the split occurs at each sequence of whitespace:
'abc def ghi'.split(' ') => ["abc", "def", "ghi"]
"abc \n\tdef\t\n ghi".split(' ') # => ["abc", "def", "ghi"]
'abc def ghi'.split(' ') => ["abc", "def", "ghi"]
''.split(' ') => []
When +field_sep+ is a string different from <tt>' '</tt>
and +limit+ is +nil+,
the split occurs at each occurrence of +field_sep+;
trailing empty substrings are not returned:
'abracadabra'.split('ab') => ["", "racad", "ra"]
'aaabcdaaa'.split('a') => ["", "", "", "bcd"]
''.split('a') => []
'3.14159'.split('1') => ["3.", "4", "59"]
'!@#$%^$&*($)_+'.split('$') # => ["!@#", "%^", "&*(", ")_+"]
'тест'.split('т') => ["", "ес"]
'こんにちは'.split('に') => ["こん", "ちは"]
When +field_sep+ is a Regexp and +limit+ is +nil+,
the split occurs at each occurrence of a match;
trailing empty substrings are not returned:
'abracadabra'.split(/ab/) # => ["", "racad", "ra"]
'aaabcdaaa'.split(/a/) => ["", "", "", "bcd"]
'aaabcdaaa'.split(//) => ["a", "a", "a", "b", "c", "d", "a", "a", "a"]
'1 + 1 == 2'.split(/\W+/) # => ["1", "1", "2"]
If the \Regexp contains groups, their matches are also included
in the returned array:
'1:2:3'.split(/(:)()()/, 2) # => ["1", ":", "", "", "2:3"]
As seen above, if +limit+ is +nil+,
trailing empty substrings are not returned;
the same is true if +limit+ is zero:
'aaabcdaaa'.split('a') => ["", "", "", "bcd"]
'aaabcdaaa'.split('a', 0) # => ["", "", "", "bcd"]
If +limit+ is positive integer +n+, no more than <tt>n - 1-</tt>
splits occur, so that at most +n+ substrings are returned,
and trailing empty substrings are included:
'aaabcdaaa'.split('a', 1) # => ["aaabcdaaa"]
'aaabcdaaa'.split('a', 2) # => ["", "aabcdaaa"]
'aaabcdaaa'.split('a', 5) # => ["", "", "", "bcd", "aa"]
'aaabcdaaa'.split('a', 7) # => ["", "", "", "bcd", "", "", ""]
'aaabcdaaa'.split('a', 8) # => ["", "", "", "bcd", "", "", ""]
Note that if +field_sep+ is a \Regexp containing groups,
their matches are in the returned array, but do not count toward the limit.
If +limit+ is negative, it behaves the same as if +limit+ was +nil+,
meaning that there is no limit,
and trailing empty substrings are included:
'aaabcdaaa'.split('a', -1) # => ["", "", "", "bcd", "", "", ""]
If a block is given, it is called with each substring:
'abc def ghi'.split(' ') {|substring| p substring }
Output:
"abc"
"def"
"ghi"

View file

@ -1810,7 +1810,17 @@ rb_ec_str_resurrect(struct rb_execution_context_struct *ec, VALUE str)
return ec_str_duplicate(ec, rb_cString, str);
}
/* :nodoc: documented in doc/string.rdoc */
/*
*
* call-seq:
* String.new(string = '') -> new_string
* String.new(string = '', encoding: encoding) -> new_string
* String.new(string = '', capacity: size) -> new_string
*
* :include: doc/string/new.rdoc
*
*/
static VALUE
rb_str_init(int argc, VALUE *argv, VALUE str)
{
@ -8652,7 +8662,14 @@ literal_split_pattern(VALUE spat, split_type_t default_type)
return default_type;
}
// String#split is documented at doc/string.rdoc.
/*
* :call-seq:
* split(field_sep = $;, limit = nil) -> array
* split(field_sep = $;, limit = nil) {|substring| ... } -> self
*
* :include: doc/string/split.rdoc
*
*/
static VALUE
rb_str_split_m(int argc, VALUE *argv, VALUE str)
@ -9063,7 +9080,14 @@ rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, VALUE ary)
return orig;
}
// String#each_line is documented at doc/string.rdoc.
/*
* call-seq:
* each_line(line_sep = $/, chomp: false) {|substring| ... } -> self
* each_line(line_sep = $/, chomp: false) -> enumerator
*
* :include: doc/string/each_line.rdoc
*
*/
static VALUE
rb_str_each_line(int argc, VALUE *argv, VALUE str)
@ -9108,7 +9132,14 @@ rb_str_enumerate_bytes(VALUE str, VALUE ary)
return str;
}
// String#each_byte is documented in doc/string.rdoc.
/*
* call-seq:
* each_byte {|byte| ... } -> self
* each_byte -> enumerator
*
* :include: doc/string/each_byte.rdoc
*
*/
static VALUE
rb_str_each_byte(VALUE str)
@ -9117,7 +9148,13 @@ rb_str_each_byte(VALUE str)
return rb_str_enumerate_bytes(str, 0);
}
// String#bytes is documented in doc/string.rdoc.
/*
* call-seq:
* bytes -> array_of_bytes
*
* :include: doc/string/bytes.rdoc
*
*/
static VALUE
rb_str_bytes(VALUE str)