mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* lib/rexml/source.rb: force_encoding("UTF-8") when the input
is already UTF-8. patched by Kouhei Sutou [ruby-core:23404] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27342 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
55f7857a77
commit
46ff009168
3 changed files with 44 additions and 14 deletions
|
@ -1,3 +1,8 @@
|
|||
Wed Apr 14 22:09:28 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||
|
||||
* lib/rexml/source.rb: force_encoding("UTF-8") when the input
|
||||
is already UTF-8. patched by Kouhei Sutou [ruby-core:23404]
|
||||
|
||||
Wed Apr 14 18:23:00 2010 Kenta Murata <mrkn@mrkn.jp>
|
||||
|
||||
* configure.in (signbit): signbit is a macro in C99.
|
||||
|
|
|
@ -162,6 +162,15 @@ module REXML
|
|||
@line_break = ">"
|
||||
end
|
||||
super( @source.eof? ? str : str+@source.readline( @line_break ) )
|
||||
|
||||
if !@to_utf and
|
||||
@buffer.respond_to?(:force_encoding) and
|
||||
@source.respond_to?(:external_encoding) and
|
||||
@source.external_encoding != ::Encoding::UTF_8
|
||||
@force_utf8 = true
|
||||
else
|
||||
@force_utf8 = false
|
||||
end
|
||||
end
|
||||
|
||||
def scan(pattern, cons=false)
|
||||
|
@ -174,11 +183,7 @@ module REXML
|
|||
if rv.size == 0
|
||||
until @buffer =~ pattern or @source.nil?
|
||||
begin
|
||||
# READLINE OPT
|
||||
#str = @source.read(@block_size)
|
||||
str = @source.readline(@line_break)
|
||||
str = decode(str) if @to_utf and str
|
||||
@buffer << str
|
||||
@buffer << readline
|
||||
rescue Iconv::IllegalSequence
|
||||
raise
|
||||
rescue
|
||||
|
@ -193,12 +198,7 @@ module REXML
|
|||
|
||||
def read
|
||||
begin
|
||||
str = @source.readline(@line_break)
|
||||
str = decode(str) if @to_utf and str
|
||||
@buffer << str
|
||||
if not @to_utf and @buffer.respond_to? :force_encoding
|
||||
@buffer.force_encoding Encoding::UTF_8
|
||||
end
|
||||
@buffer << readline
|
||||
rescue Exception, NameError
|
||||
@source = nil
|
||||
end
|
||||
|
@ -213,9 +213,7 @@ module REXML
|
|||
@buffer = $' if cons and rv
|
||||
while !rv and @source
|
||||
begin
|
||||
str = @source.readline(@line_break)
|
||||
str = decode(str) if @to_utf and str
|
||||
@buffer << str
|
||||
@buffer << readline
|
||||
rv = pattern.match(@buffer)
|
||||
@buffer = $' if cons and rv
|
||||
rescue
|
||||
|
@ -254,5 +252,18 @@ module REXML
|
|||
end
|
||||
[pos, lineno, line]
|
||||
end
|
||||
|
||||
private
|
||||
def readline
|
||||
str = @source.readline(@line_break)
|
||||
return nil if str.nil?
|
||||
|
||||
if @to_utf
|
||||
decode(str)
|
||||
else
|
||||
str.force_encoding(::Encoding::UTF_8) if @force_utf8
|
||||
str
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
require "rexml/document"
|
||||
require "test/unit"
|
||||
|
||||
|
@ -83,6 +85,18 @@ EOF
|
|||
REXML::Document.entity_expansion_limit = 10000
|
||||
end
|
||||
|
||||
def test_tag_in_cdata_with_not_ascii_only_but_ascii8bit_encoding_source
|
||||
tag = "<b>...</b>"
|
||||
message = "こんにちは、世界!" # Hello world! in Japanese
|
||||
xml = <<EOX
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<message><![CDATA[#{tag}#{message}]]></message>
|
||||
EOX
|
||||
xml.force_encoding(Encoding::ASCII_8BIT)
|
||||
doc = REXML::Document.new(xml)
|
||||
assert_equal("#{tag}#{message}", doc.root.children.first.value)
|
||||
end
|
||||
|
||||
def test_xml_declaration_standalone
|
||||
bug2539 = '[ruby-core:27345]'
|
||||
doc = REXML::Document.new('<?xml version="1.0" standalone="no" ?>')
|
||||
|
|
Loading…
Reference in a new issue