mirror of
https://github.com/haml/haml.git
synced 2022-11-09 12:33:31 -05:00
[Haml] Add -# encoding: support.
This commit is contained in:
parent
919fcf0c8d
commit
ebaf1baa42
6 changed files with 310 additions and 5 deletions
|
@ -3,6 +3,20 @@
|
|||
* Table of contents
|
||||
{:toc}
|
||||
|
||||
## 3.0.7 (Unreleased)
|
||||
|
||||
### Encoding Support
|
||||
|
||||
Haml 3.0.7 adds support for Ruby-style `-# coding:` comments
|
||||
for declaring the encoding of a template.
|
||||
For details see {file:HAML_REFERENCE.md#encodings the reference}.
|
||||
|
||||
This also slightly changes the behavior of Haml when the
|
||||
{file:HAML_REFERENCE.md#encoding-option `:encoding` option} is not set.
|
||||
Rather than defaulting to `"utf-8"`,
|
||||
it defaults to the encoding of the source document,
|
||||
and only falls back to `"utf-8"` if this encoding is `"us-ascii"`.
|
||||
|
||||
## 3.0.6
|
||||
|
||||
[Tagged on GitHub](http://github.com/nex3/haml/commit/3.0.6).
|
||||
|
|
|
@ -197,7 +197,9 @@ Available options are:
|
|||
Note that Haml **does not** automatically re-encode Ruby values;
|
||||
any strings coming from outside the application should be converted
|
||||
before being passed into the Haml template.
|
||||
Defaults to `Encoding.default_internal` or, if that's not set, `"utf-8"`.
|
||||
Defaults to `Encoding.default_internal`; if that's not set,
|
||||
defaults to the encoding of the Haml template;
|
||||
if that's `us-ascii`, defaults to `"utf-8"`.
|
||||
<br/><br/> <!-- There's no better way to do a paragraph break in a dl in Maruku -->
|
||||
Many Ruby database drivers are not yet Ruby 1.9 compatible;
|
||||
in particular, they return strings marked as ASCII-encoded
|
||||
|
@ -207,6 +209,25 @@ Available options are:
|
|||
set `:encoding` to `"ascii-8bit"`, or try to get the authors of the database drivers
|
||||
to make them Ruby 1.9 compatible.
|
||||
|
||||
### Encodings
|
||||
|
||||
When using Ruby 1.9 or later,
|
||||
Haml supports the same sorts of encoding-declaration comments that Ruby does.
|
||||
Although both Ruby and Haml support several different styles,
|
||||
the easiest it just to add `-# coding: encoding-name`
|
||||
at the beginning of the Haml template
|
||||
(it must come before all other lines).
|
||||
This will tell Haml that the template is encoded using the named encoding.
|
||||
|
||||
By default, the HTML generated by Haml has the same encoding as the Haml template.
|
||||
However, if `Encoding.default_internal` is set, Haml will attempt to use that instead.
|
||||
In addition, the [`:encoding` option](#encoding-option) can be used
|
||||
to specify an output encoding manually.
|
||||
|
||||
Note that, like Ruby, Haml does not support templates encoded in UTF-16 or UTF-32,
|
||||
since these encodings are not compatible with ASCII.
|
||||
It is possible to use these as the output encoding, though.
|
||||
|
||||
## Plain Text
|
||||
|
||||
A substantial portion of any HTML document is its content,
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
### Encoding Support
|
||||
|
||||
Add support for `@charset` for declaring the encoding of a stylesheet.
|
||||
Sass 3.0.7 adds support for `@charset` for declaring the encoding of a stylesheet.
|
||||
For details see {file:SASS_REFERENCE.md#encodings the reference}.
|
||||
|
||||
### Bug Fixes
|
||||
|
|
|
@ -85,8 +85,15 @@ module Haml
|
|||
:format => :xhtml,
|
||||
:escape_html => false,
|
||||
}
|
||||
|
||||
|
||||
template = check_haml_encoding(template) do |msg, line|
|
||||
raise Haml::Error.new(msg, line)
|
||||
end
|
||||
|
||||
unless ruby1_8?
|
||||
@options[:encoding] = Encoding.default_internal || "utf-8"
|
||||
@options[:encoding] = Encoding.default_internal || template.encoding
|
||||
@options[:encoding] = "utf-8" if @options[:encoding].name == "US-ASCII"
|
||||
end
|
||||
@options.merge! options.reject {|k, v| v.nil?}
|
||||
@index = 0
|
||||
|
@ -99,8 +106,6 @@ module Haml
|
|||
@options[:encoding] = @options[:encoding].name
|
||||
end
|
||||
|
||||
template = check_encoding(template) {|msg, line| raise Haml::Error.new(msg, line)}
|
||||
|
||||
# :eod is a special end-of-document marker
|
||||
@template = (template.rstrip).split(/\r\n|\r|\n/) + [:eod, :eod]
|
||||
@template_index = 0
|
||||
|
|
|
@ -2,6 +2,7 @@ require 'erb'
|
|||
require 'set'
|
||||
require 'enumerator'
|
||||
require 'stringio'
|
||||
require 'strscan'
|
||||
require 'haml/root'
|
||||
require 'haml/util/subset_map'
|
||||
|
||||
|
@ -434,6 +435,37 @@ MSG
|
|||
return str
|
||||
end
|
||||
|
||||
# Like {\#check\_encoding}, but also checks for a Ruby-style `-# coding:` comment
|
||||
# at the beginning of the template and uses that encoding if it exists.
|
||||
#
|
||||
# The Sass encoding rules are simple.
|
||||
# If a `-# coding:` comment exists,
|
||||
# we assume that that's the original encoding of the document.
|
||||
# Otherwise, we use whatever encoding Ruby has.
|
||||
#
|
||||
# Haml uses the same rules for parsing coding comments as Ruby.
|
||||
# This means that it can understand Emacs-style comments
|
||||
# (e.g. `-*- encoding: "utf-8" -*-`),
|
||||
# and also that it cannot understand non-ASCII-compatible encodings
|
||||
# such as `UTF-16` and `UTF-32`.
|
||||
#
|
||||
# @param str [String] The Haml template of which to check the encoding
|
||||
# @yield [msg] A block in which an encoding error can be raised.
|
||||
# Only yields if there is an encoding error
|
||||
# @yieldparam msg [String] The error message to be raised
|
||||
# @return [String] The original string encoded properly
|
||||
# @raise [ArgumentError] if the document declares an unknown encoding
|
||||
def check_haml_encoding(str, &block)
|
||||
return check_encoding(str, &block) if ruby1_8?
|
||||
|
||||
bom, encoding = parse_haml_magic_comment(str)
|
||||
if encoding; str.force_encoding(encoding)
|
||||
elsif bom; str.force_encoding("UTF-8")
|
||||
end
|
||||
|
||||
return check_encoding(str, &block)
|
||||
end
|
||||
|
||||
# Like {\#check\_encoding}, but also checks for a `@charset` declaration
|
||||
# at the beginning of the file and uses that encoding if it exists.
|
||||
#
|
||||
|
@ -695,5 +727,36 @@ METHOD
|
|||
return lcs_backtrace(c, x, y, i, j-1, &block) if c[i][j-1] > c[i-1][j]
|
||||
return lcs_backtrace(c, x, y, i-1, j, &block)
|
||||
end
|
||||
|
||||
# Parses a magic comment at the beginning of a Haml file.
|
||||
# The parsing rules are basically the same as Ruby's.
|
||||
#
|
||||
# @return [(Boolean, String or nil)]
|
||||
# Whether the document begins with a UTF-8 BOM,
|
||||
# and the declared encoding of the document (or nil if none is declared)
|
||||
def parse_haml_magic_comment(str)
|
||||
scanner = StringScanner.new(str.dup.force_encoding("BINARY"))
|
||||
bom = scanner.scan(/\xEF\xBB\xBF/n)
|
||||
return bom unless scanner.scan(/-\s*#\s*/n)
|
||||
if coding = try_parse_haml_emacs_magic_comment(scanner)
|
||||
return bom, coding
|
||||
end
|
||||
|
||||
return bom unless scanner.scan(/.*?coding[=:]\s*([\w-]+)/in)
|
||||
return bom, scanner[1]
|
||||
end
|
||||
|
||||
def try_parse_haml_emacs_magic_comment(scanner)
|
||||
pos = scanner.pos
|
||||
return unless scanner.scan(/.*?-\*-\s*/n)
|
||||
# From Ruby's parse.y
|
||||
return unless scanner.scan(/([^\s'":;]+)\s*:\s*("(?:\\.|[^"])*"|[^"\s;]+?)[\s;]*-\*-/n)
|
||||
name, val = scanner[1], scanner[2]
|
||||
return unless name =~ /(en)?coding/in
|
||||
val = $1 if val =~ /^"(.*)"$/n
|
||||
return val
|
||||
ensure
|
||||
scanner.pos = pos
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1604,6 +1604,202 @@ HAML
|
|||
assert_equal(3, e.line)
|
||||
assert_equal('Invalid UTF-16LE character "\xFE"', e.message)
|
||||
end
|
||||
|
||||
def test_same_coding_comment_as_encoding
|
||||
assert_renders_encoded(<<HTML, <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# coding: utf-8
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_different_coding_comment_than_encoding
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# coding: ibm866
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_different_coding_than_system
|
||||
assert_renders_encoded(<<HTML.encode("IBM866"), <<HAML.encode("IBM866"))
|
||||
<p>тАЬ</p>
|
||||
HTML
|
||||
%p тАЬ
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_case_insensitive_coding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# CodINg: IbM866
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_whitespace_insensitive_coding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-#coding:ibm866
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_equals_coding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# CodINg= ibm866
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_prefixed_coding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# foo BAR FAOJcoding: ibm866
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_suffixed_coding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# coding: ibm866 ASFJ (&(&#!$
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_emacs_prefixed_coding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# -*- coding: ibm866
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_emacs_suffixed_coding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# coding: ibm866 -*- coding: blah
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_emacs_coding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# -*- coding: ibm866 -*-
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_emacs_encoding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# -*- encoding: ibm866 -*-
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_quoted_emacs_coding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# -*- coding: "ibm866" -*-
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_whitespace_insensitive_emacs_coding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-#-*-coding:ibm866-*-
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_whitespace_insensitive_emacs_coding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-#-*-coding:ibm866-*-
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_one_of_several_emacs_comments
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# -*- foo: bar; coding: ibm866; baz: bang -*-
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_prefixed_emacs_coding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# foo bar coding: baz -*- coding: ibm866 -*-
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
def test_suffixed_emacs_coding_comment
|
||||
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||
<p>bâr</p>
|
||||
<p>föö</p>
|
||||
HTML
|
||||
-# -*- coding: ibm866 -*- foo bar coding: baz
|
||||
%p bâr
|
||||
%p föö
|
||||
HAML
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -1618,4 +1814,10 @@ HAML
|
|||
<p>föö</p>
|
||||
HTML
|
||||
end
|
||||
|
||||
def assert_renders_encoded(html, haml)
|
||||
result = render(haml)
|
||||
assert_equal html.encoding, result.encoding
|
||||
assert_equal html, result
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Reference in a new issue