diff --git a/doc/string/chars.rdoc b/doc/string/chars.rdoc
new file mode 100644
index 0000000000..d24a1cc3a9
--- /dev/null
+++ b/doc/string/chars.rdoc
@@ -0,0 +1,5 @@
+Returns an array of the characters in +self+:
+
+ 'hello'.chars # => ["h", "e", "l", "l", "o"]
+ 'тест'.chars # => ["т", "е", "с", "т"]
+ 'こんにちは'.chars # => ["こ", "ん", "に", "ち", "は"]
diff --git a/doc/string/codepoints.rdoc b/doc/string/codepoints.rdoc
new file mode 100644
index 0000000000..0c55d3f4b9
--- /dev/null
+++ b/doc/string/codepoints.rdoc
@@ -0,0 +1,6 @@
+Returns an array of the codepoints in +self+;
+each codepoint is the integer value for a character:
+
+ 'hello'.codepoints # => [104, 101, 108, 108, 111]
+ 'тест'.codepoints # => [1090, 1077, 1089, 1090]
+ 'こんにちは'.codepoints # => [12371, 12435, 12395, 12385, 12399]
diff --git a/doc/string/each_char.rdoc b/doc/string/each_char.rdoc
new file mode 100644
index 0000000000..e5ae5a1812
--- /dev/null
+++ b/doc/string/each_char.rdoc
@@ -0,0 +1,17 @@
+Calls the given block with each successive character from +self+;
+returns +self+:
+
+ 'hello'.each_char {|char| print char, ' ' }
+ print "\n"
+ 'тест'.each_char {|char| print char, ' ' }
+ print "\n"
+ 'こんにちは'.each_char {|char| print char, ' ' }
+ print "\n"
+
+Output:
+
+ h e l l o
+ т е с т
+ こ ん に ち は
+
+Returns an enumerator if no block is given.
diff --git a/doc/string/each_codepoint.rdoc b/doc/string/each_codepoint.rdoc
new file mode 100644
index 0000000000..88bfcbd1c0
--- /dev/null
+++ b/doc/string/each_codepoint.rdoc
@@ -0,0 +1,18 @@
+Calls the given block with each successive codepoint from +self+;
+each codepoint is the integer value for a character;
+returns +self+:
+
+ 'hello'.each_codepoint {|codepoint| print codepoint, ' ' }
+ print "\n"
+ 'тест'.each_codepoint {|codepoint| print codepoint, ' ' }
+ print "\n"
+ 'こんにちは'.each_codepoint {|codepoint| print codepoint, ' ' }
+ print "\n"
+
+Output:
+
+ 104 101 108 108 111
+ 1090 1077 1089 1090
+ 12371 12435 12395 12385 12399
+
+Returns an enumerator if no block is given.
diff --git a/doc/string/each_grapheme_cluster.rdoc b/doc/string/each_grapheme_cluster.rdoc
new file mode 100644
index 0000000000..40be95fcac
--- /dev/null
+++ b/doc/string/each_grapheme_cluster.rdoc
@@ -0,0 +1,12 @@
+Calls the given block with each successive grapheme cluster from +self+
+(see {Unicode Grapheme Cluster Boundaries}[https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries]);
+returns +self+:
+
+ s = "\u0061\u0308-pqr-\u0062\u0308-xyz-\u0063\u0308" # => "ä-pqr-b̈-xyz-c̈"
+ s.each_grapheme_cluster {|gc| print gc, ' ' }
+
+Output:
+
+ ä - p q r - b̈ - x y z - c̈
+
+Returns an enumerator if no block is given.
diff --git a/doc/string/grapheme_clusters.rdoc b/doc/string/grapheme_clusters.rdoc
new file mode 100644
index 0000000000..8c7f5a7259
--- /dev/null
+++ b/doc/string/grapheme_clusters.rdoc
@@ -0,0 +1,6 @@
+Returns an array of the grapheme clusters in +self+
+(see {Unicode Grapheme Cluster Boundaries}[https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries]):
+
+ s = "\u0061\u0308-pqr-\u0062\u0308-xyz-\u0063\u0308" # => "ä-pqr-b̈-xyz-c̈"
+ s.grapheme_clusters
+ # => ["ä", "-", "p", "q", "r", "-", "b̈", "-", "x", "y", "z", "-", "c̈"]
diff --git a/string.c b/string.c
index f46cba1a6a..f40d850d5a 100644
--- a/string.c
+++ b/string.c
@@ -9203,17 +9203,11 @@ rb_str_enumerate_chars(VALUE str, VALUE ary)
/*
* call-seq:
- * str.each_char {|cstr| block } -> str
- * str.each_char -> an_enumerator
+ * each_char {|c| ... } -> self
+ * each_char -> enumerator
*
- * Passes each character in str to the given block, or returns
- * an enumerator if no block is given.
+ * :include: doc/string/each_char.rdoc
*
- * "hello".each_char {|c| print c, ' ' }
- *
- * produces:
- *
- * h e l l o
*/
static VALUE
@@ -9225,13 +9219,10 @@ rb_str_each_char(VALUE str)
/*
* call-seq:
- * str.chars -> an_array
+ * chars -> array_of_characters
*
- * Returns an array of characters in str. This is a shorthand
- * for str.each_char.to_a
.
+ * :include: doc/string/chars.rdoc
*
- * If a block is given, which is a deprecated form, works the same as
- * each_char
.
*/
static VALUE
@@ -9272,22 +9263,11 @@ rb_str_enumerate_codepoints(VALUE str, VALUE ary)
/*
* call-seq:
- * str.each_codepoint {|integer| block } -> str
- * str.each_codepoint -> an_enumerator
+ * each_codepoint {|integer| ... } -> self
+ * each_codepoint -> enumerator
*
- * Passes the Integer ordinal of each character in str,
- * also known as a codepoint when applied to Unicode strings to the
- * given block. For encodings other than UTF-8/UTF-16(BE|LE)/UTF-32(BE|LE),
- * values are directly derived from the binary representation
- * of each character.
+ * :include: doc/string/each_codepoint.rdoc
*
- * If no block is given, an enumerator is returned instead.
- *
- * "hello\u0639".each_codepoint {|c| print c, ' ' }
- *
- * produces:
- *
- * 104 101 108 108 111 1593
*/
static VALUE
@@ -9299,14 +9279,10 @@ rb_str_each_codepoint(VALUE str)
/*
* call-seq:
- * str.codepoints -> an_array
+ * codepoints -> array_of_integers
*
- * Returns an array of the Integer ordinals of the
- * characters in str. This is a shorthand for
- * str.each_codepoint.to_a
.
+ * :include: doc/string/codepoints.rdoc
*
- * If a block is given, which is a deprecated form, works the same as
- * each_codepoint
.
*/
static VALUE
@@ -9427,16 +9403,10 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
/*
* call-seq:
- * str.each_grapheme_cluster {|cstr| block } -> str
- * str.each_grapheme_cluster -> an_enumerator
+ * each_grapheme_cluster {|gc| ... } -> self
+ * each_grapheme_cluster -> enumerator
*
- * Passes each grapheme cluster in str to the given block, or returns
- * an enumerator if no block is given.
- * Unlike String#each_char, this enumerates by grapheme clusters defined by
- * Unicode Standard Annex #29 http://unicode.org/reports/tr29/
- *
- * "a\u0300".each_char.to_a.size #=> 2
- * "a\u0300".each_grapheme_cluster.to_a.size #=> 1
+ * :include: doc/string/each_grapheme_cluster.rdoc
*
*/
@@ -9449,13 +9419,10 @@ rb_str_each_grapheme_cluster(VALUE str)
/*
* call-seq:
- * str.grapheme_clusters -> an_array
+ * grapheme_clusters -> array_of_grapheme_clusters
*
- * Returns an array of grapheme clusters in str. This is a shorthand
- * for str.each_grapheme_cluster.to_a
.
+ * :include: doc/string/grapheme_clusters.rdoc
*
- * If a block is given, which is a deprecated form, works the same as
- * each_grapheme_cluster
.
*/
static VALUE
@@ -11363,17 +11330,16 @@ unicode_normalize_common(int argc, VALUE *argv, VALUE str, ID id)
* - +:nfkc+: Compatibility decomposition, followed by canonical composition.
* - +:nfkd+: Compatibility decomposition.
*
- * +self+ must have encoding UTF-8 or one of the other supported encodings:
+ * The encoding of +self+ must be one of:
*
- * UnicodeNormalize::UNICODE_ENCODINGS
- * # =>
- * [#,
- * #,
- * #,
- * #,
- * #,
- * #,
- * #]
+ * - Encoding::UTF_8
+ * - Encoding::UTF_16BE
+ * - Encoding::UTF_16LE
+ * - Encoding::UTF_32BE
+ * - Encoding::UTF_32LE
+ * - Encoding::GB18030
+ * - Encoding::UCS_2BE
+ * - Encoding::UCS_4BE
*
* Examples:
*