mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
* lib/rexml/xmltokens.rb: Add missing non ASCII valid characters
to element name characters. Now, REXML name tokens exactly match "[5] Name" in the XML spec and "[4] NCName" in the Namespaces in XML spec. See comment about the details. [Bug #9539] [ruby-core:60901] Reported by Mario Barcala. Thanks!!! * test/rexml/xpath/test_node.rb: Add tests for the above case. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@45153 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
parent
44a9509f2f
commit
ba5ed845b3
3 changed files with 122 additions and 5 deletions
11
ChangeLog
11
ChangeLog
|
@ -1,3 +1,14 @@
|
||||||
|
Sun Feb 23 17:55:50 2014 Kouhei Sutou <kou@cozmixng.org>
|
||||||
|
|
||||||
|
* lib/rexml/xmltokens.rb: Add missing non ASCII valid characters
|
||||||
|
to element name characters. Now, REXML name tokens exactly
|
||||||
|
match "[5] Name" in the XML spec and "[4] NCName" in the
|
||||||
|
Namespaces in XML spec. See comment about the details.
|
||||||
|
[Bug #9539] [ruby-core:60901]
|
||||||
|
Reported by Mario Barcala. Thanks!!!
|
||||||
|
|
||||||
|
* test/rexml/xpath/test_node.rb: Add tests for the above case.
|
||||||
|
|
||||||
Sun Feb 23 12:18:54 2014 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
Sun Feb 23 12:18:54 2014 Nobuyoshi Nakada <nobu@ruby-lang.org>
|
||||||
|
|
||||||
* ext/socket/raddrinfo.c (inet_pton): use rb_w32_inet_pton, instead of
|
* ext/socket/raddrinfo.c (inet_pton): use rb_w32_inet_pton, instead of
|
||||||
|
|
|
@ -2,12 +2,78 @@ module REXML
|
||||||
# Defines a number of tokens used for parsing XML. Not for general
|
# Defines a number of tokens used for parsing XML. Not for general
|
||||||
# consumption.
|
# consumption.
|
||||||
module XMLTokens
|
module XMLTokens
|
||||||
NCNAME_STR= '[\w:][\-\w.]*'
|
# From http://www.w3.org/TR/REC-xml/#sec-common-syn
|
||||||
NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
#
|
||||||
|
# [4] NameStartChar ::=
|
||||||
|
# ":" |
|
||||||
|
# [A-Z] |
|
||||||
|
# "_" |
|
||||||
|
# [a-z] |
|
||||||
|
# [#xC0-#xD6] |
|
||||||
|
# [#xD8-#xF6] |
|
||||||
|
# [#xF8-#x2FF] |
|
||||||
|
# [#x370-#x37D] |
|
||||||
|
# [#x37F-#x1FFF] |
|
||||||
|
# [#x200C-#x200D] |
|
||||||
|
# [#x2070-#x218F] |
|
||||||
|
# [#x2C00-#x2FEF] |
|
||||||
|
# [#x3001-#xD7FF] |
|
||||||
|
# [#xF900-#xFDCF] |
|
||||||
|
# [#xFDF0-#xFFFD] |
|
||||||
|
# [#x10000-#xEFFFF]
|
||||||
|
name_start_chars = [
|
||||||
|
":",
|
||||||
|
"A-Z",
|
||||||
|
"_",
|
||||||
|
"a-z",
|
||||||
|
"\\u00C0-\\u00D6",
|
||||||
|
"\\u00D8-\\u00F6",
|
||||||
|
"\\u00F8-\\u02FF",
|
||||||
|
"\\u0370-\\u037D",
|
||||||
|
"\\u037F-\\u1FFF",
|
||||||
|
"\\u200C-\\u200D",
|
||||||
|
"\\u2070-\\u218F",
|
||||||
|
"\\u2C00-\\u2FEF",
|
||||||
|
"\\u3001-\\uD7FF",
|
||||||
|
"\\uF900-\\uFDCF",
|
||||||
|
"\\uFDF0-\\uFFFD",
|
||||||
|
"\\u{10000}-\\u{EFFFF}",
|
||||||
|
]
|
||||||
|
# From http://www.w3.org/TR/REC-xml/#sec-common-syn
|
||||||
|
#
|
||||||
|
# [4a] NameChar ::=
|
||||||
|
# NameStartChar |
|
||||||
|
# "-" |
|
||||||
|
# "." |
|
||||||
|
# [0-9] |
|
||||||
|
# #xB7 |
|
||||||
|
# [#x0300-#x036F] |
|
||||||
|
# [#x203F-#x2040]
|
||||||
|
name_chars = name_start_chars + [
|
||||||
|
"\\-",
|
||||||
|
"\\.",
|
||||||
|
"0-9",
|
||||||
|
"\\u00B7",
|
||||||
|
"\\u0300-\\u036F",
|
||||||
|
"\\u203F-\\u2040",
|
||||||
|
]
|
||||||
|
NAME_START_CHAR = "[#{name_start_chars.join('')}]"
|
||||||
|
NAME_CHAR = "[#{name_chars.join('')}]"
|
||||||
|
NAMECHAR = NAME_CHAR # deprecated. Use NAME_CHAR instead.
|
||||||
|
|
||||||
NAMECHAR = '[\-\w\.:]'
|
# From http://www.w3.org/TR/xml-names11/#NT-NCName
|
||||||
NAME = "([\\w:]#{NAMECHAR}*)"
|
#
|
||||||
NMTOKEN = "(?:#{NAMECHAR})+"
|
# [6] NCNameStartChar ::= NameStartChar - ':'
|
||||||
|
ncname_start_chars = name_start_chars - [":"]
|
||||||
|
# From http://www.w3.org/TR/xml-names11/#NT-NCName
|
||||||
|
#
|
||||||
|
# [5] NCNameChar ::= NameChar - ':'
|
||||||
|
ncname_chars = name_chars - [":"]
|
||||||
|
NCNAME_STR = "[#{ncname_start_chars.join('')}][#{ncname_chars.join('')}]*"
|
||||||
|
NAME_STR = "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
||||||
|
|
||||||
|
NAME = "(#{NAME_START_CHAR}#{NAME_CHAR}*)"
|
||||||
|
NMTOKEN = "(?:#{NAME_CHAR})+"
|
||||||
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
||||||
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
|
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
|
||||||
|
|
||||||
|
|
40
test/rexml/xpath/test_node.rb
Normal file
40
test/rexml/xpath/test_node.rb
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
require_relative "../rexml_test_utils"
|
||||||
|
|
||||||
|
require "rexml/document"
|
||||||
|
|
||||||
|
class TestXPathNode < Test::Unit::TestCase
|
||||||
|
def matches(xml, xpath)
|
||||||
|
document = REXML::Document.new(xml)
|
||||||
|
REXML::XPath.each(document, xpath).collect(&:to_s)
|
||||||
|
end
|
||||||
|
|
||||||
|
class TestQName < self
|
||||||
|
def test_ascii
|
||||||
|
xml = <<-XML
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<root>
|
||||||
|
<ascii>
|
||||||
|
<child>child</child>
|
||||||
|
</ascii>
|
||||||
|
</root>
|
||||||
|
XML
|
||||||
|
assert_equal(["<child>child</child>"],
|
||||||
|
matches(xml, "/root/ascii/child"))
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_non_ascii
|
||||||
|
xml = <<-XML
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<root>
|
||||||
|
<non-àscii>
|
||||||
|
<child>child</child>
|
||||||
|
</non-àscii>
|
||||||
|
</root>
|
||||||
|
XML
|
||||||
|
assert_equal(["<child>child</child>"],
|
||||||
|
matches(xml, "/root/non-àscii/child"))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Loading…
Add table
Add a link
Reference in a new issue