diff --git a/doc/examples/files.rdoc b/doc/examples/files.rdoc new file mode 100644 index 0000000000..f736132770 --- /dev/null +++ b/doc/examples/files.rdoc @@ -0,0 +1,26 @@ +# English text with newlines. +text = <<~EOT + First line + Second line + + Fourth line + Fifth line +EOT + +# Russian text. +russian = "\u{442 435 441 442}" # => "тест" + +# Binary data. +data = "\u9990\u9991\u9992\u9993\u9994" + +# Text file. +File.write('t.txt', text) + +# File with Russian text. +File.write('t.rus', russian) + +# File with binary data. +f = File.new('t.dat', 'wb:UTF-16') +f.write(data) +f.close + diff --git a/doc/io_streams.rdoc b/doc/io_streams.rdoc new file mode 100644 index 0000000000..b686d67eb5 --- /dev/null +++ b/doc/io_streams.rdoc @@ -0,0 +1,350 @@ +== \IO Streams + +Ruby supports processing data as \IO streams; +that is, as data that may be read, re-read, written, re-written, +and traversed via iteration. + +Core classes with such support include: + +- IO, and its derived class File. +- {StringIO}[rdoc-ref:StringIO]: for processing a string. +- {ARGF}[rdoc-ref:ARGF]: for processing files cited on the command line. + +Pre-existing stream objects that are referenced by constants include: + +- $stdin: read-only instance of \IO. +- $stdout: write-only instance of \IO. +- $stderr: read-only instance of \IO. +- \ARGF: read-only instance of \ARGF. + +You can create stream objects: + +- \File: + + - File.new: returns a new \File object. + - File.open: passes a new \File object to given the block. + +- \IO: + + - IO.new: returns a new \IO object for the given integer file descriptor. + - IO.open: passes a new \IO object to the given block. + - IO.popen: returns a new \IO object that is connected to the $stdin + and $stdout of a newly-launched subprocess. + - Kernel#open: returns a new \IO object connected to a given source: + stream, file, or subprocess. + +- \StringIO: + + - StringIO.new: returns a new \StringIO object. + - StringIO.open: passes a new \StringIO object to the given block. + +(You cannot create an \ARGF object, but one already exists.) + +=== About the Examples + +Many examples here use these variables: + + # English text with newlines. + text = <<~EOT + First line + Second line + + Fourth line + Fifth line + EOT + + # Russian text. + russian = "\u{442 435 441 442}" # => "тест" + + # Binary data. + data = "\u9990\u9991\u9992\u9993\u9994" + + # Text file. + File.write('t.txt', text) + + # File with Russian text. + File.write('t.rus', russian) + + # File with binary data. + f = File.new('t.dat', 'wb:UTF-16') + f.write(data) + f.close + +=== Position + +An \IO stream has a nonnegative integer _position_, +which is the byte offset at which the next read or write is to occur; +the relevant methods: + +- +#tell+ (aliased as #pos): Returns the current position (in bytes) in the stream: + + f = File.new('t.txt') + f.tell # => 0 + f.gets # => "First line\n" + f.tell # => 12 + f.close + +- +#pos=+: Sets the position of the stream (in bytes): + + f = File.new('t.txt') + f.tell # => 0 + f.pos = 20 # => 20 + f.tell # => 20 + f.close + +- +#seek+: Sets the position of the stream to a given integer +offset+ + (in bytes), with respect to a given constant +whence+, which is one of: + + - +:CUR+ or IO::SEEK_CUR: + Repositions the stream to its current position plus the given +offset+: + + f = File.new('t.txt') + f.tell # => 0 + f.seek(20, :CUR) # => 0 + f.tell # => 20 + f.seek(-10, :CUR) # => 0 + f.tell # => 10 + f.close + + - +:END+ or IO::SEEK_END: + Repositions the stream to its end plus the given +offset+: + + f = File.new('t.txt') + f.tell # => 0 + f.seek(0, :END) # => 0 # Repositions to stream end. + f.tell # => 52 + f.seek(-20, :END) # => 0 + f.tell # => 32 + f.seek(-40, :END) # => 0 + f.tell # => 12 + f.close + + - +:SET+ or IO:SEEK_SET: + Repositions the stream to the given +offset+: + + f = File.new('t.txt') + f.tell # => 0 + f.seek(20, :SET) # => 0 + f.tell # => 20 + f.seek(40, :SET) # => 0 + f.tell # => 40 + f.close + +- +#rewind+: Positions the stream to the beginning: + + f = File.new('t.txt') + f.tell # => 0 + f.gets # => "First line\n" + f.tell # => 12 + f.rewind # => 0 + f.tell # => 0 + f.close + +=== Lines + +Some reader methods in \IO streams are line-oriented; +such a method reads one or more lines, +which are separated by an implicit or explicit line separator. + +These methods are included (except as noted) in classes Kernel, IO, File, +and {ARGF}[rdoc-ref:ARGF]: + +- +#each_line+ - passes each line to the block; not in Kernel: + + f = File.new('t.txt') + f.each_line {|line| p line } + + Output: + + "First line\n" + "Second line\n" + "\n" + "Fourth line\n" + "Fifth line\n" + + The reading may begin mid-line: + + f = File.new('t.txt') + f.pos = 27 + f.each_line {|line| p line } + + Output: + + "rth line\n" + "Fifth line\n" + +- +#gets+ - returns the next line (which may begin mid-line): + + f = File.new('t.txt') + f.gets # => "First line\n" + f.gets # => "Second line\n" + f.pos = 27 + f.gets # => "rth line\n" + f.readlines # => ["Fifth line\n"] + f.gets # => nil + +- +#readline+ - like #gets, but raises an exception at end-of-file; + not in StringIO. + +- +#readlines+ - returns all remaining lines in an array; + may begin mid-line: + + f = File.new('t.txt') + f.pos = 19 + f.readlines # => ["ine\n", "\n", "Fourth line\n", "Fifth line\n"] + f.readlines # => [] + +Each of these methods may be called with: + +- An optional line separator, +sep+. +- An optional line-size limit, +limit+. +- Both +sep+ and +limit+. + +==== Line Separator + +The default line separator is the given by the global variable $/, +whose value is by default "\n". +The line to be read next is all data from the current position +to the next line separator: + + f = File.new('t.txt') + f.gets # => "First line\n" + f.gets # => "Second line\n" + f.gets # => "\n" + f.gets # => "Fourth line\n" + f.gets # => "Fifth line\n" + f.close + +You can specify a different line separator: + + f = File.new('t.txt') + f.gets('l') # => "First l" + f.gets('li') # => "ine\nSecond li" + f.gets('lin') # => "ne\n\nFourth lin" + f.gets # => "e\n" + f.close + +There are two special line separators: + +- +nil+: The entire stream is read into a single string: + + f = File.new('t.txt') + f.gets(nil) # => "First line\nSecond line\n\nFourth line\nFifth line\n" + f.close + +- '' (the empty string): The next "paragraph" is read + (paragraphs being separated by two consecutive line separators): + + f = File.new('t.txt') + f.gets('') # => "First line\nSecond line\n\n" + f.gets('') # => "Fourth line\nFifth line\n" + f.close + +==== Line Limit + +The line to be read may be further defined by an optional integer argument +limit+, +which specifies that the number of bytes returned may not be (much) longer +than the given +limit+; +a multi-byte character will not be split, and so a line may be slightly longer +than the given limit. + +If +limit+ is not given, the line is determined only by +sep+. + + # Text with 1-byte characters. + File.new('t.txt') {|f| f.gets(1) } # => "F" + File.new('t.txt') {|f| f.gets(2) } # => "Fi" + File.new('t.txt') {|f| f.gets(3) } # => "Fir" + File.new('t.txt') {|f| f.gets(4) } # => "Firs" + # No more than one line. + File.new('t.txt') {|f| f.gets(10) } # => "First line" + File.new('t.txt') {|f| f.gets(11) } # => "First line\n" + File.new('t.txt') {|f| f.gets(12) } # => "First line\n" + + # Text with 2-byte characters, which will not be split. + File.new('r.rus') {|f| f.gets(1).size } # => 1 + File.new('r.rus') {|f| f.gets(2).size } # => 1 + File.new('r.rus') {|f| f.gets(3).size } # => 2 + File.new('r.rus') {|f| f.gets(4).size } # => 2 + +==== Line Separator and Line Limit + +With arguments +sep+ and +limit+ given, +combines the two behaviors: + +- Returns the next line as determined by line separator +sep+. +- But returns no more bytes than are allowed by the limit. + +Example: + + File.new('t.txt') {|f| f.gets('li', 20) } # => "First li" + File.new('t.txt') {|f| f.gets('li', 2) } # => "Fi" + +==== Line Number + +A readable \IO stream has a _line_ _number_, +which is the non-negative integer line number +in the stream where the next read will occur. + +A new stream is initially has line number +0+. + +\Method IO#lineno returns the line number. + +Reading lines from a stream usually changes its line number: + + f = File.new('t.txt', 'r') + f.lineno # => 0 + f.readline # => "This is line one.\n" + f.lineno # => 1 + f.readline # => "This is the second line.\n" + f.lineno # => 2 + f.readline # => "Here's the third line.\n" + f.lineno # => 3 + f.eof? # => true + f.close + +Iterating over lines in a stream usually changes its line number: + + f = File.new('t.txt') + f.each_line do |line| + p "position=#{f.pos} eof?=#{f.eof?} lineno=#{f.lineno}" + end + f.close + +Output: + + "position=11 eof?=false lineno=1" + "position=23 eof?=false lineno=2" + "position=24 eof?=false lineno=3" + "position=36 eof?=false lineno=4" + "position=47 eof?=true lineno=5" + +==== Line Options + +A number of \IO methods accept optional keyword arguments +that determine how lines in a stream are to be treated: + +- +:chomp+: If +true+, line separators are omitted; default is +false+. + +=== Open and Closed \IO Streams + +A new \IO stream may be open for reading, open for writing, or both. + +You can close a stream using these methods: + +- +#close+ - closes the stream for both reading and writing. + +- +#close_read+ (not available in \ARGF) - closes the stream for reading. + +- +#close_write+ (not available in \ARGF) - closes the stream for writing. + +You can query whether a stream is closed using these methods: + +- +#closed?+ - returns whether the stream is closed. + +=== Stream End-of-File + +You can query whether a stream is at end-of-file using this method: + +- +#eof?+ (also aliased as +#eof+) - + returns whether the stream is at end-of-file. + diff --git a/file.c b/file.c index 862f9630df..cf67dd2aaf 100644 --- a/file.c +++ b/file.c @@ -6527,6 +6527,602 @@ const char ruby_null_device[] = * \Class \File extends module FileTest, supporting such singleton methods * as File.exist?. * + * === About the Examples + * + * Many examples here use these variables: + * + * :include: doc/examples/files.rdoc + * + * == \File Access Modes + * + * \Methods File.new and File.open each create a \File object for a given file path. + * + * === \String Access Modes + * + * \Methods File.new and File.open each may take string argument +mode+, which: + * + * - Begins with a 1- or 2-character + * {read/write mode}[rdoc-ref:File@Read-2FWrite+Mode]. + * - May also contain a 1-character {data mode}[rdoc-ref:File@Data+Mode]. + * - May also contain a 1-character + * {file-create mode}[rdoc-ref:File@File-Create+Mode]. + * + * ==== Read/Write Mode + * + * The read/write +mode+ determines: + * + * - Whether the file is to be initially truncated. + * + * - Whether reading is allowed, and if so: + * + * - The initial read position in the file. + * - Where in the file reading can occur. + * + * - Whether writing is allowed, and if so: + * + * - The initial write position in the file. + * - Where in the file writing can occur. + * + * These tables summarize: + * + * Read/Write Modes for Existing File + * + * |------|-----------|----------|----------|----------|-----------| + * | R/W | Initial | | Initial | | Initial | + * | Mode | Truncate? | Read | Read Pos | Write | Write Pos | + * |------|-----------|----------|----------|----------|-----------| + * | 'r' | No | Anywhere | 0 | Error | - | + * | 'w' | Yes | Error | - | Anywhere | 0 | + * | 'a' | No | Error | - | End only | End | + * | 'r+' | No | Anywhere | 0 | Anywhere | 0 | + * | 'w+' | Yes | Anywhere | 0 | Anywhere | 0 | + * | 'a+' | No | Anywhere | End | End only | End | + * |------|-----------|----------|----------|----------|-----------| + * + * Read/Write Modes for \File To Be Created + * + * |------|----------|----------|----------|-----------| + * | R/W | | Initial | | Initial | + * | Mode | Read | Read Pos | Write | Write Pos | + * |------|----------|----------|----------|-----------| + * | 'w' | Error | - | Anywhere | 0 | + * | 'a' | Error | - | End only | 0 | + * | 'w+' | Anywhere | 0 | Anywhere | 0 | + * | 'a+' | Anywhere | 0 | End only | End | + * |------|----------|----------|----------|-----------| + * + * Note that modes 'r' and 'r+' are not allowed + * for a non-existent file (exception raised). + * + * In the tables: + * + * - +Anywhere+ means that methods IO#rewind, IO#pos=, and IO#seek + * may be used to change the file's position, + * so that allowed reading or writing may occur anywhere in the file. + * - End only means that writing can occur only at end-of-file, + * and that methods IO#rewind, IO#pos=, and IO#seek do not affect writing. + * - +Error+ means that an exception is raised if disallowed reading or writing + * is attempted. + * + * ===== Read/Write Modes for Existing \File + * + * - 'r': + * + * - File is not initially truncated: + * + * f = File.new('t.txt') # => # + * f.size == 0 # => false + * + * - File's initial read position is 0: + * + * f.pos # => 0 + * + * - File may be read anywhere; see IO#rewind, IO#pos=, IO#seek: + * + * f.readline # => "First line\n" + * f.readline # => "Second line\n" + * + * f.rewind + * f.readline # => "First line\n" + * + * f.pos = 1 + * f.readline # => "irst line\n" + * + * f.seek(1, :CUR) + * f.readline # => "econd line\n" + * + * - Writing is not allowed: + * + * f.write('foo') # Raises IOError. + * + * - 'w': + * + * - File is initially truncated: + * + * path = 't.tmp' + * File.write(path, text) + * f = File.new(path, 'w') + * f.size == 0 # => true + * + * - File's initial write position is 0: + * + * f.pos # => 0 + * + * - File may be written anywhere (even past end-of-file); + * see IO#rewind, IO#pos=, IO#seek: + * + * f.write('foo') + * f.flush + * File.read(path) # => "foo" + * f.pos # => 3 + * + * f.write('bar') + * f.flush + * File.read(path) # => "foobar" + * f.pos # => 6 + * + * f.rewind + * f.write('baz') + * f.flush + * File.read(path) # => "bazbar" + * f.pos # => 3 + * + * f.pos = 3 + * f.write('foo') + * f.flush + * File.read(path) # => "bazfoo" + * f.pos # => 6 + * + * f.seek(-3, :END) + * f.write('bam') + * f.flush + * File.read(path) # => "bazbam" + * f.pos # => 6 + * + * f.pos = 8 + * f.write('bah') # Zero padding as needed. + * f.flush + * File.read(path) # => "bazbam\u0000\u0000bah" + * f.pos # => 11 + * + * - Reading is not allowed: + * + * f.read # Raises IOError. + * + * - 'a': + * + * - File is not initially truncated: + * + * path = 't.tmp' + * File.write(path, 'foo') + * f = File.new(path, 'a') + * f.size == 0 # => false + * + * - File's initial position is 0 (but is ignored): + * + * f.pos # => 0 + * + * - File may be written only at end-of-file; + * IO#rewind, IO#pos=, IO#seek do not affect writing: + * + * f.write('bar') + * f.flush + * File.read(path) # => "foobar" + * f.write('baz') + * f.flush + * File.read(path) # => "foobarbaz" + * + * f.rewind + * f.write('bat') + * f.flush + * File.read(path) # => "foobarbazbat" + * + * - Reading is not allowed: + * + * f.read # Raises IOError. + * + * - 'r+': + * + * - File is not initially truncated: + * + * path = 't.tmp' + * File.write(path, text) + * f = File.new(path, 'r+') + * f.size == 0 # => false + * + * - File's initial read position is 0: + * + * f.pos # => 0 + * + * - File may be read or written anywhere (even past end-of-file); + * see IO#rewind, IO#pos=, IO#seek: + * + * f.readline # => "First line\n" + * f.readline # => "Second line\n" + * + * f.rewind + * f.readline # => "First line\n" + * + * f.pos = 1 + * f.readline # => "irst line\n" + * + * f.seek(1, :CUR) + * f.readline # => "econd line\n" + * + * f.rewind + * f.write('WWW') + * f.flush + * File.read(path) + * # => "WWWst line\nSecond line\nFourth line\nFifth line\n" + * + * f.pos = 10 + * f.write('XXX') + * f.flush + * File.read(path) + * # => "WWWst lineXXXecond line\nFourth line\nFifth line\n" + * + * f.seek(-6, :END) + * # => 0 + * f.write('YYY') + * # => 3 + * f.flush + * # => # + * File.read(path) + * # => "WWWst lineXXXecond line\nFourth line\nFifth YYYe\n" + * + * f.seek(2, :END) + * f.write('ZZZ') # Zero padding as needed. + * f.flush + * File.read(path) + * # => "WWWst lineXXXecond line\nFourth line\nFifth YYYe\n\u0000\u0000ZZZ" + * + * + * - 'a+': + * + * - File is not initially truncated: + * + * path = 't.tmp' + * File.write(path, 'foo') + * f = File.new(path, 'a+') + * f.size == 0 # => false + * + * - File's initial read position is 0: + * + * f.pos # => 0 + * + * - File may be written only at end-of-file; + * IO#rewind, IO#pos=, IO#seek do not affect writing: + * + * f.write('bar') + * f.flush + * File.read(path) # => "foobar" + * f.write('baz') + * f.flush + * File.read(path) # => "foobarbaz" + * + * f.rewind + * f.write('bat') + * f.flush + * File.read(path) # => "foobarbazbat" + * + * - File may be read anywhere; see IO#rewind, IO#pos=, IO#seek: + * + * f.rewind + * f.read # => "foobarbazbat" + * + * f.pos = 3 + * f.read # => "barbazbat" + * + * f.seek(-3, :END) + * f.read # => "bat" + * + * ===== Read/Write Modes for \File To Be Created + * + * Note that modes 'r' and 'r+' are not allowed + * for a non-existent file (exception raised). + * + * - 'w': + * + * - File's initial write position is 0: + * + * path = 't.tmp' + * FileUtils.rm_f(path) + * f = File.new(path, 'w') + * f.pos # => 0 + * + * - File may be written anywhere (even past end-of-file); + * see IO#rewind, IO#pos=, IO#seek: + * + * f.write('foo') + * f.flush + * File.read(path) # => "foo" + * f.pos # => 3 + * + * f.write('bar') + * f.flush + * File.read(path) # => "foobar" + * f.pos # => 6 + * + * f.rewind + * f.write('baz') + * f.flush + * File.read(path) # => "bazbar" + * f.pos # => 3 + * + * f.pos = 3 + * f.write('foo') + * f.flush + * File.read(path) # => "bazfoo" + * f.pos # => 6 + * + * f.seek(-3, :END) + * f.write('bam') + * f.flush + * File.read(path) # => "bazbam" + * f.pos # => 6 + * + * f.pos = 8 + * f.write('bah') # Zero padding as needed. + * f.flush + * File.read(path) # => "bazbam\u0000\u0000bah" + * f.pos # => 11 + * + * - Reading is not allowed: + * + * f.read # Raises IOError. + * + * - 'a': + * + * - File's initial write position is 0: + * + * path = 't.tmp' + * FileUtils.rm_f(path) + * f = File.new(path, 'a') + * f.pos # => 0 + * + * - Writing occurs only at end-of-file: + * + * f.write('foo') + * f.pos # => 3 + * f.write('bar') + * f.pos # => 6 + * f.flush + * File.read(path) # => "foobar" + * + * f.rewind + * f.write('baz') + * f.flush + * File.read(path) # => "foobarbaz" + * + * - Reading is not allowed: + * + * f.read # Raises IOError. + * + * - 'w+': + * + * - File's initial position is 0: + * + * path = 't.tmp' + * FileUtils.rm_f(path) + * f = File.new(path, 'w+') + * f.pos # => 0 + * + * - File may be written anywhere (even past end-of-file); + * see IO#rewind, IO#pos=, IO#seek: + * + * f.write('foo') + * f.flush + * File.read(path) # => "foo" + * f.pos # => 3 + * + * f.write('bar') + * f.flush + * File.read(path) # => "foobar" + * f.pos # => 6 + * + * f.rewind + * f.write('baz') + * f.flush + * File.read(path) # => "bazbar" + * f.pos # => 3 + * + * f.pos = 3 + * f.write('foo') + * f.flush + * File.read(path) # => "bazfoo" + * f.pos # => 6 + * + * f.seek(-3, :END) + * f.write('bam') + * f.flush + * File.read(path) # => "bazbam" + * f.pos # => 6 + * + * f.pos = 8 + * f.write('bah') # Zero padding as needed. + * f.flush + * File.read(path) # => "bazbam\u0000\u0000bah" + * f.pos # => 11 + * + * - File may be read anywhere (even past end-of-file); + * see IO#rewind, IO#pos=, IO#seek: + * + * f.rewind + * # => 0 + * f.read + * # => "bazbam\u0000\u0000bah" + * + * f.pos = 3 + * # => 3 + * f.read + * # => "bam\u0000\u0000bah" + * + * f.seek(-3, :END) + * # => 0 + * f.read + * # => "bah" + * + * - 'a+': + * + * - File's initial write position is 0: + * + * path = 't.tmp' + * FileUtils.rm_f(path) + * f = File.new(path, 'a+') + * f.pos # => 0 + * + * - Writing occurs only at end-of-file: + * + * f.write('foo') + * f.pos # => 3 + * f.write('bar') + * f.pos # => 6 + * f.flush + * File.read(path) # => "foobar" + * + * f.rewind + * f.write('baz') + * f.flush + * File.read(path) # => "foobarbaz" + * + * - File may be read anywhere (even past end-of-file); + * see IO#rewind, IO#pos=, IO#seek: + * + * f.rewind + * f.read # => "foobarbaz" + * + * f.pos = 3 + * f.read # => "barbaz" + * + * f.seek(-3, :END) + * f.read # => "baz" + * + * f.pos = 800 + * f.read # => "" + * + * ==== Data Mode + * + * To specify whether data is to be treated as text or as binary data, + * either of the following may be suffixed to any of the string read/write modes + * above: + * + * - 't': Text data; sets the default external encoding + * to Encoding::UTF_8; + * on Windows, enables conversion between EOL and CRLF + * and enables interpreting 0x1A as an end-of-file marker. + * - 'b': Binary data; sets the default external encoding + * to Encoding::ASCII_8BIT; + * on Windows, suppresses conversion between EOL and CRLF + * and disables interpreting 0x1A as an end-of-file marker. + * + * If neither is given, the stream defaults to text data. + * + * Examples: + * + * File.new('t.txt', 'rt') + * File.new('t.dat', 'rb') + * + * When the data mode is specified, the read/write mode may not be omitted, + * and the data mode must precede the file-create mode, if given: + * + * File.new('t.dat', 'b') # Raises an exception. + * File.new('t.dat', 'rxb') # Raises an exception. + * + * ==== \File-Create Mode + * + * The following may be suffixed to any writable string mode above: + * + * - 'x': Creates the file if it does not exist; + * raises an exception if the file exists. + * + * Example: + * + * File.new('t.tmp', 'wx') + * + * When the file-create mode is specified, the read/write mode may not be omitted, + * and the file-create mode must follow the data mode: + * + * File.new('t.dat', 'x') # Raises an exception. + * File.new('t.dat', 'rxb') # Raises an exception. + * + * === \Integer Access Modes + * + * When mode is an integer it must be one or more of the following constants, + * which may be combined by the bitwise OR operator |: + * + * - +File::RDONLY+: Open for reading only. + * - +File::WRONLY+: Open for writing only. + * - +File::RDWR+: Open for reading and writing. + * - +File::APPEND+: Open for appending only. + * + * Examples: + * + * File.new('t.txt', File::RDONLY) + * File.new('t.tmp', File::RDWR | File::CREAT | File::EXCL) + * + * Note: Method IO#set_encoding does not allow the mode to be specified as an integer. + * + * === File-Create Mode Specified as an \Integer + * + * These constants may also be ORed into the integer mode: + * + * - +File::CREAT+: Create file if it does not exist. + * - +File::EXCL+: Raise an exception if +File::CREAT+ is given and the file exists. + * + * === Data Mode Specified as an \Integer + * + * Data mode cannot be specified as an integer. + * When the stream access mode is given as an integer, + * the data mode is always text, never binary. + * + * Note that although there is a constant +File::BINARY+, + * setting its value in an integer stream mode has no effect; + * this is because, as documented in File::Constants, + * the +File::BINARY+ value disables line code conversion, + * but does not change the external encoding. + * + * === Encodings + * + * Any of the string modes above may specify encodings - + * either external encoding only or both external and internal encodings - + * by appending one or both encoding names, separated by colons: + * + * f = File.new('t.dat', 'rb') + * f.external_encoding # => # + * f.internal_encoding # => nil + * f = File.new('t.dat', 'rb:UTF-16') + * f.external_encoding # => # + * f.internal_encoding # => nil + * f = File.new('t.dat', 'rb:UTF-16:UTF-16') + * f.external_encoding # => # + * f.internal_encoding # => # + * f.close + * + * The numerous encoding names are available in array Encoding.name_list: + * + * Encoding.name_list.take(3) # => ["ASCII-8BIT", "UTF-8", "US-ASCII"] + * + * When the external encoding is set, strings read are tagged by that encoding + * when reading, and strings written are converted to that encoding when + * writing. + * + * When both external and internal encodings are set, + * strings read are converted from external to internal encoding, + * and strings written are converted from internal to external encoding. + * For further details about transcoding input and output, + * see {Encodings}[rdoc-ref:io_streams.rdoc@Encodings]. + * + * If the external encoding is 'BOM|UTF-8', 'BOM|UTF-16LE' + * or 'BOM|UTF16-BE', + * Ruby checks for a Unicode BOM in the input document + * to help determine the encoding. + * For UTF-16 encodings the file open mode must be binary. + * If the BOM is found, + * it is stripped and the external encoding from the BOM is used. + * + * Note that the BOM-style encoding option is case insensitive, + * so 'bom|utf-8' is also valid. + * * == \File Permissions * * A \File object has _permissions_, an octal integer representing @@ -6584,34 +7180,6 @@ const char ruby_null_device[] = * may be found in module File::Constants; * an array of their names is returned by File::Constants.constants. * - * == Example Files - * - * Many examples here use these filenames and their corresponding files: - * - * - t.txt: A text-only file that is assumed to exist via: - * - * text = <<~EOT - * First line - * Second line - * - * Fourth line - * Fifth line - * EOT - * File.write('t.txt', text) - * - * - t.dat: A data file that is assumed to exist via: - * - * data = "\u9990\u9991\u9992\u9993\u9994" - * f = File.open('t.dat', 'wb:UTF-16') - * f.write(data) - * f.close - * - * - t.rus: A Russian-language text file that is assumed to exist via: - * - * File.write('t.rus', "\u{442 435 441 442}") - * - * - t.tmp: A file that is assumed _not_ to exist. - * * == What's Here * * First, what's elsewhere. \Class \File: