1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

* lib/rexml/source.rb: force_encoding("UTF-8") when the input

is already UTF-8. patched by Kouhei Sutou [ruby-core:23404]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27342 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
naruse 2010-04-14 13:11:11 +00:00
parent 55f7857a77
commit 46ff009168
3 changed files with 44 additions and 14 deletions

View file

@ -1,3 +1,8 @@
Wed Apr 14 22:09:28 2010 NARUSE, Yui <naruse@ruby-lang.org>
* lib/rexml/source.rb: force_encoding("UTF-8") when the input
is already UTF-8. patched by Kouhei Sutou [ruby-core:23404]
Wed Apr 14 18:23:00 2010 Kenta Murata <mrkn@mrkn.jp>
* configure.in (signbit): signbit is a macro in C99.

View file

@ -162,6 +162,15 @@ module REXML
@line_break = ">"
end
super( @source.eof? ? str : str+@source.readline( @line_break ) )
if !@to_utf and
@buffer.respond_to?(:force_encoding) and
@source.respond_to?(:external_encoding) and
@source.external_encoding != ::Encoding::UTF_8
@force_utf8 = true
else
@force_utf8 = false
end
end
def scan(pattern, cons=false)
@ -174,11 +183,7 @@ module REXML
if rv.size == 0
until @buffer =~ pattern or @source.nil?
begin
# READLINE OPT
#str = @source.read(@block_size)
str = @source.readline(@line_break)
str = decode(str) if @to_utf and str
@buffer << str
@buffer << readline
rescue Iconv::IllegalSequence
raise
rescue
@ -193,12 +198,7 @@ module REXML
def read
begin
str = @source.readline(@line_break)
str = decode(str) if @to_utf and str
@buffer << str
if not @to_utf and @buffer.respond_to? :force_encoding
@buffer.force_encoding Encoding::UTF_8
end
@buffer << readline
rescue Exception, NameError
@source = nil
end
@ -213,9 +213,7 @@ module REXML
@buffer = $' if cons and rv
while !rv and @source
begin
str = @source.readline(@line_break)
str = decode(str) if @to_utf and str
@buffer << str
@buffer << readline
rv = pattern.match(@buffer)
@buffer = $' if cons and rv
rescue
@ -254,5 +252,18 @@ module REXML
end
[pos, lineno, line]
end
private
def readline
str = @source.readline(@line_break)
return nil if str.nil?
if @to_utf
decode(str)
else
str.force_encoding(::Encoding::UTF_8) if @force_utf8
str
end
end
end
end

View file

@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
require "rexml/document"
require "test/unit"
@ -83,6 +85,18 @@ EOF
REXML::Document.entity_expansion_limit = 10000
end
def test_tag_in_cdata_with_not_ascii_only_but_ascii8bit_encoding_source
tag = "<b>...</b>"
message = "こんにちは、世界!" # Hello world! in Japanese
xml = <<EOX
<?xml version="1.0" encoding="UTF-8"?>
<message><![CDATA[#{tag}#{message}]]></message>
EOX
xml.force_encoding(Encoding::ASCII_8BIT)
doc = REXML::Document.new(xml)
assert_equal("#{tag}#{message}", doc.root.children.first.value)
end
def test_xml_declaration_standalone
bug2539 = '[ruby-core:27345]'
doc = REXML::Document.new('<?xml version="1.0" standalone="no" ?>')