mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* lib/rexml/source.rb: force_encoding("UTF-8") when the input
is already UTF-8. patched by Kouhei Sutou [ruby-core:23404] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27342 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
55f7857a77
commit
46ff009168
3 changed files with 44 additions and 14 deletions
|
@ -1,3 +1,8 @@
|
||||||
|
Wed Apr 14 22:09:28 2010 NARUSE, Yui <naruse@ruby-lang.org>
|
||||||
|
|
||||||
|
* lib/rexml/source.rb: force_encoding("UTF-8") when the input
|
||||||
|
is already UTF-8. patched by Kouhei Sutou [ruby-core:23404]
|
||||||
|
|
||||||
Wed Apr 14 18:23:00 2010 Kenta Murata <mrkn@mrkn.jp>
|
Wed Apr 14 18:23:00 2010 Kenta Murata <mrkn@mrkn.jp>
|
||||||
|
|
||||||
* configure.in (signbit): signbit is a macro in C99.
|
* configure.in (signbit): signbit is a macro in C99.
|
||||||
|
|
|
@ -162,6 +162,15 @@ module REXML
|
||||||
@line_break = ">"
|
@line_break = ">"
|
||||||
end
|
end
|
||||||
super( @source.eof? ? str : str+@source.readline( @line_break ) )
|
super( @source.eof? ? str : str+@source.readline( @line_break ) )
|
||||||
|
|
||||||
|
if !@to_utf and
|
||||||
|
@buffer.respond_to?(:force_encoding) and
|
||||||
|
@source.respond_to?(:external_encoding) and
|
||||||
|
@source.external_encoding != ::Encoding::UTF_8
|
||||||
|
@force_utf8 = true
|
||||||
|
else
|
||||||
|
@force_utf8 = false
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def scan(pattern, cons=false)
|
def scan(pattern, cons=false)
|
||||||
|
@ -174,11 +183,7 @@ module REXML
|
||||||
if rv.size == 0
|
if rv.size == 0
|
||||||
until @buffer =~ pattern or @source.nil?
|
until @buffer =~ pattern or @source.nil?
|
||||||
begin
|
begin
|
||||||
# READLINE OPT
|
@buffer << readline
|
||||||
#str = @source.read(@block_size)
|
|
||||||
str = @source.readline(@line_break)
|
|
||||||
str = decode(str) if @to_utf and str
|
|
||||||
@buffer << str
|
|
||||||
rescue Iconv::IllegalSequence
|
rescue Iconv::IllegalSequence
|
||||||
raise
|
raise
|
||||||
rescue
|
rescue
|
||||||
|
@ -193,12 +198,7 @@ module REXML
|
||||||
|
|
||||||
def read
|
def read
|
||||||
begin
|
begin
|
||||||
str = @source.readline(@line_break)
|
@buffer << readline
|
||||||
str = decode(str) if @to_utf and str
|
|
||||||
@buffer << str
|
|
||||||
if not @to_utf and @buffer.respond_to? :force_encoding
|
|
||||||
@buffer.force_encoding Encoding::UTF_8
|
|
||||||
end
|
|
||||||
rescue Exception, NameError
|
rescue Exception, NameError
|
||||||
@source = nil
|
@source = nil
|
||||||
end
|
end
|
||||||
|
@ -213,9 +213,7 @@ module REXML
|
||||||
@buffer = $' if cons and rv
|
@buffer = $' if cons and rv
|
||||||
while !rv and @source
|
while !rv and @source
|
||||||
begin
|
begin
|
||||||
str = @source.readline(@line_break)
|
@buffer << readline
|
||||||
str = decode(str) if @to_utf and str
|
|
||||||
@buffer << str
|
|
||||||
rv = pattern.match(@buffer)
|
rv = pattern.match(@buffer)
|
||||||
@buffer = $' if cons and rv
|
@buffer = $' if cons and rv
|
||||||
rescue
|
rescue
|
||||||
|
@ -254,5 +252,18 @@ module REXML
|
||||||
end
|
end
|
||||||
[pos, lineno, line]
|
[pos, lineno, line]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
def readline
|
||||||
|
str = @source.readline(@line_break)
|
||||||
|
return nil if str.nil?
|
||||||
|
|
||||||
|
if @to_utf
|
||||||
|
decode(str)
|
||||||
|
else
|
||||||
|
str.force_encoding(::Encoding::UTF_8) if @force_utf8
|
||||||
|
str
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
require "rexml/document"
|
require "rexml/document"
|
||||||
require "test/unit"
|
require "test/unit"
|
||||||
|
|
||||||
|
@ -83,6 +85,18 @@ EOF
|
||||||
REXML::Document.entity_expansion_limit = 10000
|
REXML::Document.entity_expansion_limit = 10000
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_tag_in_cdata_with_not_ascii_only_but_ascii8bit_encoding_source
|
||||||
|
tag = "<b>...</b>"
|
||||||
|
message = "こんにちは、世界!" # Hello world! in Japanese
|
||||||
|
xml = <<EOX
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<message><![CDATA[#{tag}#{message}]]></message>
|
||||||
|
EOX
|
||||||
|
xml.force_encoding(Encoding::ASCII_8BIT)
|
||||||
|
doc = REXML::Document.new(xml)
|
||||||
|
assert_equal("#{tag}#{message}", doc.root.children.first.value)
|
||||||
|
end
|
||||||
|
|
||||||
def test_xml_declaration_standalone
|
def test_xml_declaration_standalone
|
||||||
bug2539 = '[ruby-core:27345]'
|
bug2539 = '[ruby-core:27345]'
|
||||||
doc = REXML::Document.new('<?xml version="1.0" standalone="no" ?>')
|
doc = REXML::Document.new('<?xml version="1.0" standalone="no" ?>')
|
||||||
|
|
Loading…
Reference in a new issue