2011-08-23 19:53:49 -04:00
|
|
|
# coding: US-ASCII
|
2017-11-27 05:45:24 -05:00
|
|
|
# frozen_string_literal: true
|
2011-08-23 19:53:49 -04:00
|
|
|
|
2010-12-19 22:22:49 -05:00
|
|
|
##
|
|
|
|
# This class is a wrapper around File IO and Encoding that helps RDoc load
|
|
|
|
# files and convert them to the correct encoding.
|
|
|
|
|
|
|
|
module RDoc::Encoding
|
|
|
|
|
2018-03-26 01:56:26 -04:00
|
|
|
HEADER_REGEXP = /^
|
|
|
|
(?:
|
|
|
|
\A\#!.*\n
|
|
|
|
|
|
|
|
|
^\#\s+frozen[-_]string[-_]literal[=:].+\n
|
|
|
|
|
|
|
|
|
^\#[^\n]+\b(?:en)?coding[=:]\s*(?<name>[^\s;]+).*\n
|
|
|
|
|
|
|
|
|
<\?xml[^?]*encoding=(?<quote>["'])(?<name>.*?)\k<quote>.*\n
|
|
|
|
)+
|
|
|
|
/xi # :nodoc:
|
|
|
|
|
2010-12-19 22:22:49 -05:00
|
|
|
##
|
|
|
|
# Reads the contents of +filename+ and handles any encoding directives in
|
|
|
|
# the file.
|
|
|
|
#
|
|
|
|
# The content will be converted to the +encoding+. If the file cannot be
|
|
|
|
# converted a warning will be printed and nil will be returned.
|
2011-02-07 02:07:12 -05:00
|
|
|
#
|
|
|
|
# If +force_transcode+ is true the document will be transcoded and any
|
|
|
|
# unknown character in the target encoding will be replaced with '?'
|
2010-12-19 22:22:49 -05:00
|
|
|
|
2011-02-07 02:07:12 -05:00
|
|
|
def self.read_file filename, encoding, force_transcode = false
|
2018-03-26 01:56:26 -04:00
|
|
|
content = File.open filename, "rb" do |f| f.read end
|
2011-11-29 19:13:02 -05:00
|
|
|
content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ /mswin|mingw/
|
2010-12-19 22:22:49 -05:00
|
|
|
|
|
|
|
utf8 = content.sub!(/\A\xef\xbb\xbf/, '')
|
|
|
|
|
2018-03-26 01:56:26 -04:00
|
|
|
enc = RDoc::Encoding.detect_encoding content
|
|
|
|
content = RDoc::Encoding.change_encoding content, enc if enc
|
2010-12-19 22:22:49 -05:00
|
|
|
|
2016-09-07 18:23:38 -04:00
|
|
|
begin
|
|
|
|
encoding ||= Encoding.default_external
|
|
|
|
orig_encoding = content.encoding
|
|
|
|
|
|
|
|
if not orig_encoding.ascii_compatible? then
|
2017-11-27 05:45:24 -05:00
|
|
|
content = content.encode encoding
|
2016-09-07 18:23:38 -04:00
|
|
|
elsif utf8 then
|
2017-11-27 05:45:24 -05:00
|
|
|
content = RDoc::Encoding.change_encoding content, Encoding::UTF_8
|
|
|
|
content = content.encode encoding
|
2016-09-07 18:23:38 -04:00
|
|
|
else
|
|
|
|
# assume the content is in our output encoding
|
2017-11-27 05:45:24 -05:00
|
|
|
content = RDoc::Encoding.change_encoding content, encoding
|
2016-09-07 18:23:38 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
unless content.valid_encoding? then
|
|
|
|
# revert and try to transcode
|
2017-11-27 05:45:24 -05:00
|
|
|
content = RDoc::Encoding.change_encoding content, orig_encoding
|
|
|
|
content = content.encode encoding
|
2016-09-07 18:23:38 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
unless content.valid_encoding? then
|
|
|
|
warn "unable to convert #{filename} to #{encoding}, skipping"
|
|
|
|
content = nil
|
|
|
|
end
|
|
|
|
rescue Encoding::InvalidByteSequenceError,
|
|
|
|
Encoding::UndefinedConversionError => e
|
|
|
|
if force_transcode then
|
2017-11-27 05:45:24 -05:00
|
|
|
content = RDoc::Encoding.change_encoding content, orig_encoding
|
|
|
|
content = content.encode(encoding,
|
|
|
|
:invalid => :replace,
|
|
|
|
:undef => :replace,
|
|
|
|
:replace => '?')
|
2016-09-07 18:23:38 -04:00
|
|
|
return content
|
|
|
|
else
|
|
|
|
warn "unable to convert #{e.message} for #{filename}, skipping"
|
|
|
|
return nil
|
2010-12-19 22:22:49 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
content
|
|
|
|
rescue ArgumentError => e
|
|
|
|
raise unless e.message =~ /unknown encoding name - (.*)/
|
|
|
|
warn "unknown encoding name \"#{$1}\" for #{filename}, skipping"
|
|
|
|
nil
|
|
|
|
rescue Errno::EISDIR, Errno::ENOENT
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
2016-09-05 06:35:30 -04:00
|
|
|
def self.remove_frozen_string_literal string
|
|
|
|
string =~ /\A(?:#!.*\n)?(.*\n)/
|
|
|
|
first_line = $1
|
|
|
|
|
|
|
|
if first_line =~ /\A# +frozen[-_]string[-_]literal[=:].+$/i
|
2017-11-27 05:45:24 -05:00
|
|
|
string = string.sub first_line, ''
|
2016-09-05 06:35:30 -04:00
|
|
|
end
|
2017-11-27 05:45:24 -05:00
|
|
|
|
|
|
|
string
|
2016-09-05 06:35:30 -04:00
|
|
|
end
|
|
|
|
|
2010-12-19 22:22:49 -05:00
|
|
|
##
|
2018-03-26 01:56:26 -04:00
|
|
|
# Detects the encoding of +string+ based on the magic comment
|
2010-12-19 22:22:49 -05:00
|
|
|
|
2018-03-26 01:56:26 -04:00
|
|
|
def self.detect_encoding string
|
|
|
|
result = HEADER_REGEXP.match string
|
|
|
|
name = result && result[:name]
|
2010-12-19 22:22:49 -05:00
|
|
|
|
2018-03-26 01:56:26 -04:00
|
|
|
name ? Encoding.find(name) : nil
|
|
|
|
end
|
2016-09-05 06:35:30 -04:00
|
|
|
|
2018-03-26 01:56:26 -04:00
|
|
|
##
|
|
|
|
# Removes magic comments and shebang
|
2017-11-27 05:45:24 -05:00
|
|
|
|
2018-03-26 01:56:26 -04:00
|
|
|
def self.remove_magic_comment string
|
|
|
|
string.sub HEADER_REGEXP do |s|
|
|
|
|
s.gsub(/[^\n]/, '')
|
|
|
|
end
|
2017-11-27 05:45:24 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
##
|
|
|
|
# Changes encoding based on +encoding+ without converting and returns new
|
|
|
|
# string
|
|
|
|
|
|
|
|
def self.change_encoding text, encoding
|
|
|
|
if text.kind_of? RDoc::Comment
|
|
|
|
text.encode! encoding
|
|
|
|
else
|
|
|
|
# TODO: Remove this condition after Ruby 2.2 EOL
|
|
|
|
if RUBY_VERSION < '2.3.0'
|
|
|
|
text.force_encoding encoding
|
|
|
|
else
|
|
|
|
String.new text, encoding: encoding
|
|
|
|
end
|
|
|
|
end
|
2010-12-19 22:22:49 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
end
|