mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
[DOC] Enhanced RDoc for String (#5685)
Treats: #chars #codepoints #each_char #each_codepoint #each_grapheme_cluster #grapheme_clusters Also, corrects a passage in #unicode_normalize that mentioned module UnicodeNormalize, whose doc (:nodoc:, actually) says not to mention it.
This commit is contained in:
parent
26aff37466
commit
0140e6c41e
Notes:
git
2022-03-23 04:51:26 +09:00
Merged-By: BurdetteLamar <BurdetteLamar@Yahoo.com>
7 changed files with 88 additions and 58 deletions
5
doc/string/chars.rdoc
Normal file
5
doc/string/chars.rdoc
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
Returns an array of the characters in +self+:
|
||||||
|
|
||||||
|
'hello'.chars # => ["h", "e", "l", "l", "o"]
|
||||||
|
'тест'.chars # => ["т", "е", "с", "т"]
|
||||||
|
'こんにちは'.chars # => ["こ", "ん", "に", "ち", "は"]
|
6
doc/string/codepoints.rdoc
Normal file
6
doc/string/codepoints.rdoc
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
Returns an array of the codepoints in +self+;
|
||||||
|
each codepoint is the integer value for a character:
|
||||||
|
|
||||||
|
'hello'.codepoints # => [104, 101, 108, 108, 111]
|
||||||
|
'тест'.codepoints # => [1090, 1077, 1089, 1090]
|
||||||
|
'こんにちは'.codepoints # => [12371, 12435, 12395, 12385, 12399]
|
17
doc/string/each_char.rdoc
Normal file
17
doc/string/each_char.rdoc
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
Calls the given block with each successive character from +self+;
|
||||||
|
returns +self+:
|
||||||
|
|
||||||
|
'hello'.each_char {|char| print char, ' ' }
|
||||||
|
print "\n"
|
||||||
|
'тест'.each_char {|char| print char, ' ' }
|
||||||
|
print "\n"
|
||||||
|
'こんにちは'.each_char {|char| print char, ' ' }
|
||||||
|
print "\n"
|
||||||
|
|
||||||
|
Output:
|
||||||
|
|
||||||
|
h e l l o
|
||||||
|
т е с т
|
||||||
|
こ ん に ち は
|
||||||
|
|
||||||
|
Returns an enumerator if no block is given.
|
18
doc/string/each_codepoint.rdoc
Normal file
18
doc/string/each_codepoint.rdoc
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
Calls the given block with each successive codepoint from +self+;
|
||||||
|
each codepoint is the integer value for a character;
|
||||||
|
returns +self+:
|
||||||
|
|
||||||
|
'hello'.each_codepoint {|codepoint| print codepoint, ' ' }
|
||||||
|
print "\n"
|
||||||
|
'тест'.each_codepoint {|codepoint| print codepoint, ' ' }
|
||||||
|
print "\n"
|
||||||
|
'こんにちは'.each_codepoint {|codepoint| print codepoint, ' ' }
|
||||||
|
print "\n"
|
||||||
|
|
||||||
|
Output:
|
||||||
|
|
||||||
|
104 101 108 108 111
|
||||||
|
1090 1077 1089 1090
|
||||||
|
12371 12435 12395 12385 12399
|
||||||
|
|
||||||
|
Returns an enumerator if no block is given.
|
12
doc/string/each_grapheme_cluster.rdoc
Normal file
12
doc/string/each_grapheme_cluster.rdoc
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
Calls the given block with each successive grapheme cluster from +self+
|
||||||
|
(see {Unicode Grapheme Cluster Boundaries}[https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries]);
|
||||||
|
returns +self+:
|
||||||
|
|
||||||
|
s = "\u0061\u0308-pqr-\u0062\u0308-xyz-\u0063\u0308" # => "ä-pqr-b̈-xyz-c̈"
|
||||||
|
s.each_grapheme_cluster {|gc| print gc, ' ' }
|
||||||
|
|
||||||
|
Output:
|
||||||
|
|
||||||
|
ä - p q r - b̈ - x y z - c̈
|
||||||
|
|
||||||
|
Returns an enumerator if no block is given.
|
6
doc/string/grapheme_clusters.rdoc
Normal file
6
doc/string/grapheme_clusters.rdoc
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
Returns an array of the grapheme clusters in +self+
|
||||||
|
(see {Unicode Grapheme Cluster Boundaries}[https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries]):
|
||||||
|
|
||||||
|
s = "\u0061\u0308-pqr-\u0062\u0308-xyz-\u0063\u0308" # => "ä-pqr-b̈-xyz-c̈"
|
||||||
|
s.grapheme_clusters
|
||||||
|
# => ["ä", "-", "p", "q", "r", "-", "b̈", "-", "x", "y", "z", "-", "c̈"]
|
82
string.c
82
string.c
|
@ -9203,17 +9203,11 @@ rb_str_enumerate_chars(VALUE str, VALUE ary)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* call-seq:
|
* call-seq:
|
||||||
* str.each_char {|cstr| block } -> str
|
* each_char {|c| ... } -> self
|
||||||
* str.each_char -> an_enumerator
|
* each_char -> enumerator
|
||||||
*
|
*
|
||||||
* Passes each character in <i>str</i> to the given block, or returns
|
* :include: doc/string/each_char.rdoc
|
||||||
* an enumerator if no block is given.
|
|
||||||
*
|
*
|
||||||
* "hello".each_char {|c| print c, ' ' }
|
|
||||||
*
|
|
||||||
* <em>produces:</em>
|
|
||||||
*
|
|
||||||
* h e l l o
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
|
@ -9225,13 +9219,10 @@ rb_str_each_char(VALUE str)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* call-seq:
|
* call-seq:
|
||||||
* str.chars -> an_array
|
* chars -> array_of_characters
|
||||||
*
|
*
|
||||||
* Returns an array of characters in <i>str</i>. This is a shorthand
|
* :include: doc/string/chars.rdoc
|
||||||
* for <code>str.each_char.to_a</code>.
|
|
||||||
*
|
*
|
||||||
* If a block is given, which is a deprecated form, works the same as
|
|
||||||
* <code>each_char</code>.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
|
@ -9272,22 +9263,11 @@ rb_str_enumerate_codepoints(VALUE str, VALUE ary)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* call-seq:
|
* call-seq:
|
||||||
* str.each_codepoint {|integer| block } -> str
|
* each_codepoint {|integer| ... } -> self
|
||||||
* str.each_codepoint -> an_enumerator
|
* each_codepoint -> enumerator
|
||||||
*
|
*
|
||||||
* Passes the Integer ordinal of each character in <i>str</i>,
|
* :include: doc/string/each_codepoint.rdoc
|
||||||
* also known as a <i>codepoint</i> when applied to Unicode strings to the
|
|
||||||
* given block. For encodings other than UTF-8/UTF-16(BE|LE)/UTF-32(BE|LE),
|
|
||||||
* values are directly derived from the binary representation
|
|
||||||
* of each character.
|
|
||||||
*
|
*
|
||||||
* If no block is given, an enumerator is returned instead.
|
|
||||||
*
|
|
||||||
* "hello\u0639".each_codepoint {|c| print c, ' ' }
|
|
||||||
*
|
|
||||||
* <em>produces:</em>
|
|
||||||
*
|
|
||||||
* 104 101 108 108 111 1593
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
|
@ -9299,14 +9279,10 @@ rb_str_each_codepoint(VALUE str)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* call-seq:
|
* call-seq:
|
||||||
* str.codepoints -> an_array
|
* codepoints -> array_of_integers
|
||||||
*
|
*
|
||||||
* Returns an array of the Integer ordinals of the
|
* :include: doc/string/codepoints.rdoc
|
||||||
* characters in <i>str</i>. This is a shorthand for
|
|
||||||
* <code>str.each_codepoint.to_a</code>.
|
|
||||||
*
|
*
|
||||||
* If a block is given, which is a deprecated form, works the same as
|
|
||||||
* <code>each_codepoint</code>.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
|
@ -9427,16 +9403,10 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* call-seq:
|
* call-seq:
|
||||||
* str.each_grapheme_cluster {|cstr| block } -> str
|
* each_grapheme_cluster {|gc| ... } -> self
|
||||||
* str.each_grapheme_cluster -> an_enumerator
|
* each_grapheme_cluster -> enumerator
|
||||||
*
|
*
|
||||||
* Passes each grapheme cluster in <i>str</i> to the given block, or returns
|
* :include: doc/string/each_grapheme_cluster.rdoc
|
||||||
* an enumerator if no block is given.
|
|
||||||
* Unlike String#each_char, this enumerates by grapheme clusters defined by
|
|
||||||
* Unicode Standard Annex #29 http://unicode.org/reports/tr29/
|
|
||||||
*
|
|
||||||
* "a\u0300".each_char.to_a.size #=> 2
|
|
||||||
* "a\u0300".each_grapheme_cluster.to_a.size #=> 1
|
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -9449,13 +9419,10 @@ rb_str_each_grapheme_cluster(VALUE str)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* call-seq:
|
* call-seq:
|
||||||
* str.grapheme_clusters -> an_array
|
* grapheme_clusters -> array_of_grapheme_clusters
|
||||||
*
|
*
|
||||||
* Returns an array of grapheme clusters in <i>str</i>. This is a shorthand
|
* :include: doc/string/grapheme_clusters.rdoc
|
||||||
* for <code>str.each_grapheme_cluster.to_a</code>.
|
|
||||||
*
|
*
|
||||||
* If a block is given, which is a deprecated form, works the same as
|
|
||||||
* <code>each_grapheme_cluster</code>.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static VALUE
|
static VALUE
|
||||||
|
@ -11363,17 +11330,16 @@ unicode_normalize_common(int argc, VALUE *argv, VALUE str, ID id)
|
||||||
* - +:nfkc+: Compatibility decomposition, followed by canonical composition.
|
* - +:nfkc+: Compatibility decomposition, followed by canonical composition.
|
||||||
* - +:nfkd+: Compatibility decomposition.
|
* - +:nfkd+: Compatibility decomposition.
|
||||||
*
|
*
|
||||||
* +self+ must have encoding UTF-8 or one of the other supported encodings:
|
* The encoding of +self+ must be one of:
|
||||||
*
|
*
|
||||||
* UnicodeNormalize::UNICODE_ENCODINGS
|
* - Encoding::UTF_8
|
||||||
* # =>
|
* - Encoding::UTF_16BE
|
||||||
* [#<Encoding:UTF-16BE (autoload)>,
|
* - Encoding::UTF_16LE
|
||||||
* #<Encoding:UTF-16LE>,
|
* - Encoding::UTF_32BE
|
||||||
* #<Encoding:UTF-32BE (autoload)>,
|
* - Encoding::UTF_32LE
|
||||||
* #<Encoding:UTF-32LE (autoload)>,
|
* - Encoding::GB18030
|
||||||
* #<Encoding:GB18030 (autoload)>,
|
* - Encoding::UCS_2BE
|
||||||
* #<Encoding:UTF-16BE (autoload)>,
|
* - Encoding::UCS_4BE
|
||||||
* #<Encoding:UTF-32BE (autoload)>]
|
|
||||||
*
|
*
|
||||||
* Examples:
|
* Examples:
|
||||||
*
|
*
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue