[DOC] Enhanced RDoc for String (#5742)

Treats: #force_encoding #b #valid_encoding? #ascii_only? #scrub #scrub! #unicode_normalized? Plus a couple of minor tweaks.
Merged-By: BurdetteLamar <BurdetteLamar@Yahoo.com>
2022-11-09 12:17:21 -05:00 · 2022-03-31 15:09:25 -05:00 · 2022-03-31 15:09:25 -05:00 · 056b7a8633 · 2022-04-01 05:09:44 +09:00
commit 056b7a8633
parent bb037f6d86
4 changed files with 101 additions and 69 deletions
--- a/doc/string/b.rdoc
+++ b/doc/string/b.rdoc
@ -0,0 +1,14 @@
+Returns a copy of +self+ that has ASCII-8BIT encoding;
+the underlying bytes are not modified:
+
+  s = "\x99"
+  s.encoding   # => #<Encoding:UTF-8>
+  t = s.b      # => "\x99"
+  t.encoding   # => #<Encoding:ASCII-8BIT>
+
+  s = "\u4095" # => "䂕"
+  s.encoding   # => #<Encoding:UTF-8>
+  s.bytes      # => [228, 130, 149]
+  t = s.b      # => "\xE4\x82\x95"
+  t.encoding   # => #<Encoding:ASCII-8BIT>
+  t.bytes      # => [228, 130, 149]
--- a/doc/string/force_encoding.rdoc
+++ b/doc/string/force_encoding.rdoc
@ -0,0 +1,20 @@
+Changes the encoding of +self+ to +encoding+,
+which may be a string encoding name or an Encoding object;
+returns self:
+
+  s = 'łał'
+  s.bytes                   # => [197, 130, 97, 197, 130]
+  s.encoding                # => #<Encoding:UTF-8>
+  s.force_encoding('ascii') # => "\xC5\x82a\xC5\x82"
+  s.encoding                # => #<Encoding:US-ASCII>
+
+Does not change the underlying bytes:
+
+  s.bytes                   # => [197, 130, 97, 197, 130]
+
+Makes the change even if the given +encoding+ is invalid
+for +self+ (as is the change above):
+
+  s.valid_encoding?                 # => false
+  s.force_encoding(Encoding::UTF_8) # => "łał"
+  s.valid_encoding?                 # => true
--- a/doc/string/scrub.rdoc
+++ b/doc/string/scrub.rdoc
@ -0,0 +1,25 @@
+Returns a copy of +self+ with each invalid byte sequence replaced
+by the given +replacement_string+.
+
+With no block given and no argument, replaces each invalid sequence
+with the default replacement string
+(<tt>"<22>"</tt> for a Unicode encoding, <tt>'?'</tt> otherwise):
+
+  s = "foo\x81\x81bar"
+  s.scrub # => "foo<6F><6F>bar"
+
+With no block given and argument +replacement_string+ given,
+replaces each invalid sequence with that string:
+
+  "foo\x81\x81bar".scrub('xyzzy') # => "fooxyzzyxyzzybar"
+
+With a block given, replaces each invalid sequence with the value
+of the block:
+
+  "foo\x81\x81bar".scrub {|bytes| p bytes; 'XYZZY' }
+  # => "fooXYZZYXYZZYbar"
+
+Output:
+
+  "\x81"
+  "\x81"
--- a/string.c
+++ b/string.c
@ -8416,7 +8416,7 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)

 /*
 *  call-seq:
- *     str.squeeze(*selectors) -> new_string
+ *    squeeze(*selectors) -> new_string
 *
 *  Returns a copy of +self+ with characters specified by +selectors+ "squeezed"
 *  (see {Multiple Character Selectors}[rdoc-ref:character_selectors.rdoc@Multiple+Character+Selectors]):
@ -10707,7 +10707,7 @@ rb_str_delete_suffix_bang(VALUE str, VALUE suffix)

 /*
 *  call-seq:
- *     str.delete_suffix(suffix) -> new_string
+ *    delete_suffix(suffix) -> new_string
 *
 *  :include: doc/string/delete_suffix.rdoc
 *
@ -10751,9 +10751,10 @@ rb_fs_setter(VALUE val, ID id, VALUE *var)

 /*
 *  call-seq:
- *     str.force_encoding(encoding)   -> str
+ *    force_encoding(encoding) -> self
+ *
+ *  :include: doc/string/force_encoding.rdoc
 *
- *  Changes the encoding to +encoding+ and returns self.
 */

 static VALUE
@ -10769,19 +10770,7 @@ rb_str_force_encoding(VALUE str, VALUE enc)
 *  call-seq:
 *    b -> string
 *
- *  Returns a copy of +self+ with that has ASCII-8BIT encoding;
- *  the contents (bytes) of +self+ are not modified:
- *
- *    s = "\x99"
- *    s.encoding   # => #<Encoding:UTF-8>
- *    t = s.b      # => "\x99"
- *    t.encoding   # => #<Encoding:ASCII-8BIT>
- *
- *    s = "\u4095"
- *    s.encoding   # => #<Encoding:UTF-8>
- *    s.bytes      # => [228, 130, 149]
- *    t = s.b      # => "\xE4\x82\x95"
- *    t.encoding   # => #<Encoding:ASCII-8BIT>
+ *  :include: doc/string/b.rdoc
 *
 */

@ -10802,13 +10791,13 @@ rb_str_b(VALUE str)

 /*
 *  call-seq:
- *     str.valid_encoding?  -> true or false
+ *    valid_encoding? -> true or false
 *
- *  Returns true for a string which is encoded correctly.
+ *  Returns +true+ if +self+ is encoded correctly, +false+ otherwise:
 *
- *    "\xc2\xa1".force_encoding("UTF-8").valid_encoding?  #=> true
- *    "\xc2".force_encoding("UTF-8").valid_encoding?      #=> false
- *    "\x80".force_encoding("UTF-8").valid_encoding?      #=> false
+ *    "\xc2\xa1".force_encoding("UTF-8").valid_encoding? # => true
+ *    "\xc2".force_encoding("UTF-8").valid_encoding?     # => false
+ *    "\x80".force_encoding("UTF-8").valid_encoding?     # => false
 */

 static VALUE
@ -10821,12 +10810,14 @@ rb_str_valid_encoding_p(VALUE str)

 /*
 *  call-seq:
- *     str.ascii_only?  -> true or false
+ *    ascii_only? -> true or false
 *
- *  Returns true for a string which has only ASCII characters.
+ *  Returns +true+ if +self+ contains only ASCII characters,
+ *  +false+ otherwise:
+ *
+ *    'abc'.ascii_only?         # => true
+ *    "abc\u{6666}".ascii_only? # => false
 *
- *    "abc".force_encoding("UTF-8").ascii_only?          #=> true
- *    "abc\u{6666}".force_encoding("UTF-8").ascii_only?  #=> false
 */

 static VALUE
@ -11158,37 +11149,10 @@ enc_str_scrub(rb_encoding *enc, VALUE str, VALUE repl, int cr)

 /*
 *  call-seq:
- *    scrub(replacement_string = default_replacement) -> string
- *    scrub{|bytes| ... } -> string
+ *    scrub(replacement_string = default_replacement) -> new_string
+ *    scrub{|bytes| ... } -> new_string
 *
- *  Returns a copy of self with each invalid byte sequence replaced
- *  by a replacement string.
- *
- *  With no block given and no argument, replaces each invalid sequence
- *  with the default replacement string
- *  (<tt>"\uFFFD"</tt> for a Unicode encoding, <tt>'?'</tt> otherwise):
- *
- *    "\uFFFD".bytes # => [239, 191, 189]
- *    s = "foo\x81\x81bar"
- *    s.bytes
- *    # => [102, 111, 111, 129, 129, 98, 97, 114]
- *    s.scrub.bytes
- *    # => [102, 111, 111, 239, 191, 189, 239, 191, 189, 98, 97, 114]
- *
- *  With no block given and argument +replacement_string+ given,
- *  replaces each invalid sequence with that string:
- *
- *    "foo\x81\x81bar".scrub('xyzzy') # => "fooxyzzyxyzzybar"
- *
- *  With a block given, replaces each invalid sequence with the value
- *  of the block:
- *
- *    "foo\x81\x81bar".scrub {|bytes| p bytes; 'XYZZY' } # => "fooXYZZYXYZZYbar"
- *
- *  Output:
- *
- *    "\x81"
- *    "\x81"
+ *  :include: doc/string/scrub.rdoc
 *
 */
 static VALUE
@ -11203,7 +11167,7 @@ str_scrub(int argc, VALUE *argv, VALUE str)
 *  call-seq:
 *    scrub! -> self
 *    scrub!(replacement_string = default_replacement) -> self
- *    scrub!{|bytes|} -> self
+ *    scrub!{|bytes| ... } -> self
 *
 *  Like String#scrub, except that any replacements are made in +self+.
 *
@ -11267,6 +11231,7 @@ unicode_normalize_common(int argc, VALUE *argv, VALUE str, ID id)
 *    "a\u0300".unicode_normalize      # => "a"
 *    "\u00E0".unicode_normalize(:nfd) # => "a "
 *
+ *  Related: String#unicode_normalize!, String#unicode_normalized?.
 */
 static VALUE
 rb_str_unicode_normalize(int argc, VALUE *argv, VALUE str)
@ -11281,6 +11246,8 @@ rb_str_unicode_normalize(int argc, VALUE *argv, VALUE str)
 *  Like String#unicode_normalize, except that the normalization
 *  is performed on +self+.
 *
+ *  Related String#unicode_normalized?.
+ *
 */
 static VALUE
 rb_str_unicode_normalize_bang(int argc, VALUE *argv, VALUE str)
@ -11289,21 +11256,27 @@ rb_str_unicode_normalize_bang(int argc, VALUE *argv, VALUE str)
 }

 /*  call-seq:
- *    str.unicode_normalized?(form=:nfc)
+ *   unicode_normalized?(form = :nfc) -> true or false
 *
- *  Checks whether +str+ is in Unicode normalization form +form+,
- *  which can be any of the four values +:nfc+, +:nfd+, +:nfkc+, or +:nfkd+.
- *  The default is +:nfc+.
+ *  Returns +true+ if +self+ is in the given +form+ of Unicode normalization,
+ *  +false+ otherwise.
+ *  The +form+ must be one of +:nfc+, +:nfd+, +:nfkc+, or +:nfkd+.
 *
- *  If the string is not in a Unicode Encoding, then an Exception is raised.
- *  For details, see String#unicode_normalize.
+ *  Examples:
+ *
+ *    "a\u0300".unicode_normalized?       # => false
+ *    "a\u0300".unicode_normalized?(:nfd) # => true
+ *    "\u00E0".unicode_normalized?        # => true
+ *    "\u00E0".unicode_normalized?(:nfd)  # => false
+ *
+ *
+ *  Raises an exception if +self+ is not in a Unicode encoding:
+ *
+ *    s = "\xE0".force_encoding('ISO-8859-1')
+ *    s.unicode_normalized? # Raises Encoding::CompatibilityError.
+ *
+ *  Related: String#unicode_normalize, String#unicode_normalize!.
 *
- *    "a\u0300".unicode_normalized?        #=> false
- *    "a\u0300".unicode_normalized?(:nfd)  #=> true
- *    "\u00E0".unicode_normalized?         #=> true
- *    "\u00E0".unicode_normalized?(:nfd)   #=> false
- *    "\xE0".force_encoding('ISO-8859-1').unicode_normalized?
- *                                         #=> Encoding::CompatibilityError raised
 */
 static VALUE
 rb_str_unicode_normalized_p(int argc, VALUE *argv, VALUE str)