mirror of
https://github.com/haml/haml.git
synced 2022-11-09 12:33:31 -05:00
[Haml] Add -# encoding: support.
This commit is contained in:
parent
919fcf0c8d
commit
ebaf1baa42
6 changed files with 310 additions and 5 deletions
|
@ -3,6 +3,20 @@
|
||||||
* Table of contents
|
* Table of contents
|
||||||
{:toc}
|
{:toc}
|
||||||
|
|
||||||
|
## 3.0.7 (Unreleased)
|
||||||
|
|
||||||
|
### Encoding Support
|
||||||
|
|
||||||
|
Haml 3.0.7 adds support for Ruby-style `-# coding:` comments
|
||||||
|
for declaring the encoding of a template.
|
||||||
|
For details see {file:HAML_REFERENCE.md#encodings the reference}.
|
||||||
|
|
||||||
|
This also slightly changes the behavior of Haml when the
|
||||||
|
{file:HAML_REFERENCE.md#encoding-option `:encoding` option} is not set.
|
||||||
|
Rather than defaulting to `"utf-8"`,
|
||||||
|
it defaults to the encoding of the source document,
|
||||||
|
and only falls back to `"utf-8"` if this encoding is `"us-ascii"`.
|
||||||
|
|
||||||
## 3.0.6
|
## 3.0.6
|
||||||
|
|
||||||
[Tagged on GitHub](http://github.com/nex3/haml/commit/3.0.6).
|
[Tagged on GitHub](http://github.com/nex3/haml/commit/3.0.6).
|
||||||
|
|
|
@ -197,7 +197,9 @@ Available options are:
|
||||||
Note that Haml **does not** automatically re-encode Ruby values;
|
Note that Haml **does not** automatically re-encode Ruby values;
|
||||||
any strings coming from outside the application should be converted
|
any strings coming from outside the application should be converted
|
||||||
before being passed into the Haml template.
|
before being passed into the Haml template.
|
||||||
Defaults to `Encoding.default_internal` or, if that's not set, `"utf-8"`.
|
Defaults to `Encoding.default_internal`; if that's not set,
|
||||||
|
defaults to the encoding of the Haml template;
|
||||||
|
if that's `us-ascii`, defaults to `"utf-8"`.
|
||||||
<br/><br/> <!-- There's no better way to do a paragraph break in a dl in Maruku -->
|
<br/><br/> <!-- There's no better way to do a paragraph break in a dl in Maruku -->
|
||||||
Many Ruby database drivers are not yet Ruby 1.9 compatible;
|
Many Ruby database drivers are not yet Ruby 1.9 compatible;
|
||||||
in particular, they return strings marked as ASCII-encoded
|
in particular, they return strings marked as ASCII-encoded
|
||||||
|
@ -207,6 +209,25 @@ Available options are:
|
||||||
set `:encoding` to `"ascii-8bit"`, or try to get the authors of the database drivers
|
set `:encoding` to `"ascii-8bit"`, or try to get the authors of the database drivers
|
||||||
to make them Ruby 1.9 compatible.
|
to make them Ruby 1.9 compatible.
|
||||||
|
|
||||||
|
### Encodings
|
||||||
|
|
||||||
|
When using Ruby 1.9 or later,
|
||||||
|
Haml supports the same sorts of encoding-declaration comments that Ruby does.
|
||||||
|
Although both Ruby and Haml support several different styles,
|
||||||
|
the easiest it just to add `-# coding: encoding-name`
|
||||||
|
at the beginning of the Haml template
|
||||||
|
(it must come before all other lines).
|
||||||
|
This will tell Haml that the template is encoded using the named encoding.
|
||||||
|
|
||||||
|
By default, the HTML generated by Haml has the same encoding as the Haml template.
|
||||||
|
However, if `Encoding.default_internal` is set, Haml will attempt to use that instead.
|
||||||
|
In addition, the [`:encoding` option](#encoding-option) can be used
|
||||||
|
to specify an output encoding manually.
|
||||||
|
|
||||||
|
Note that, like Ruby, Haml does not support templates encoded in UTF-16 or UTF-32,
|
||||||
|
since these encodings are not compatible with ASCII.
|
||||||
|
It is possible to use these as the output encoding, though.
|
||||||
|
|
||||||
## Plain Text
|
## Plain Text
|
||||||
|
|
||||||
A substantial portion of any HTML document is its content,
|
A substantial portion of any HTML document is its content,
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
|
|
||||||
### Encoding Support
|
### Encoding Support
|
||||||
|
|
||||||
Add support for `@charset` for declaring the encoding of a stylesheet.
|
Sass 3.0.7 adds support for `@charset` for declaring the encoding of a stylesheet.
|
||||||
For details see {file:SASS_REFERENCE.md#encodings the reference}.
|
For details see {file:SASS_REFERENCE.md#encodings the reference}.
|
||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
|
|
|
@ -85,8 +85,15 @@ module Haml
|
||||||
:format => :xhtml,
|
:format => :xhtml,
|
||||||
:escape_html => false,
|
:escape_html => false,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template = check_haml_encoding(template) do |msg, line|
|
||||||
|
raise Haml::Error.new(msg, line)
|
||||||
|
end
|
||||||
|
|
||||||
unless ruby1_8?
|
unless ruby1_8?
|
||||||
@options[:encoding] = Encoding.default_internal || "utf-8"
|
@options[:encoding] = Encoding.default_internal || template.encoding
|
||||||
|
@options[:encoding] = "utf-8" if @options[:encoding].name == "US-ASCII"
|
||||||
end
|
end
|
||||||
@options.merge! options.reject {|k, v| v.nil?}
|
@options.merge! options.reject {|k, v| v.nil?}
|
||||||
@index = 0
|
@index = 0
|
||||||
|
@ -99,8 +106,6 @@ module Haml
|
||||||
@options[:encoding] = @options[:encoding].name
|
@options[:encoding] = @options[:encoding].name
|
||||||
end
|
end
|
||||||
|
|
||||||
template = check_encoding(template) {|msg, line| raise Haml::Error.new(msg, line)}
|
|
||||||
|
|
||||||
# :eod is a special end-of-document marker
|
# :eod is a special end-of-document marker
|
||||||
@template = (template.rstrip).split(/\r\n|\r|\n/) + [:eod, :eod]
|
@template = (template.rstrip).split(/\r\n|\r|\n/) + [:eod, :eod]
|
||||||
@template_index = 0
|
@template_index = 0
|
||||||
|
|
|
@ -2,6 +2,7 @@ require 'erb'
|
||||||
require 'set'
|
require 'set'
|
||||||
require 'enumerator'
|
require 'enumerator'
|
||||||
require 'stringio'
|
require 'stringio'
|
||||||
|
require 'strscan'
|
||||||
require 'haml/root'
|
require 'haml/root'
|
||||||
require 'haml/util/subset_map'
|
require 'haml/util/subset_map'
|
||||||
|
|
||||||
|
@ -434,6 +435,37 @@ MSG
|
||||||
return str
|
return str
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Like {\#check\_encoding}, but also checks for a Ruby-style `-# coding:` comment
|
||||||
|
# at the beginning of the template and uses that encoding if it exists.
|
||||||
|
#
|
||||||
|
# The Sass encoding rules are simple.
|
||||||
|
# If a `-# coding:` comment exists,
|
||||||
|
# we assume that that's the original encoding of the document.
|
||||||
|
# Otherwise, we use whatever encoding Ruby has.
|
||||||
|
#
|
||||||
|
# Haml uses the same rules for parsing coding comments as Ruby.
|
||||||
|
# This means that it can understand Emacs-style comments
|
||||||
|
# (e.g. `-*- encoding: "utf-8" -*-`),
|
||||||
|
# and also that it cannot understand non-ASCII-compatible encodings
|
||||||
|
# such as `UTF-16` and `UTF-32`.
|
||||||
|
#
|
||||||
|
# @param str [String] The Haml template of which to check the encoding
|
||||||
|
# @yield [msg] A block in which an encoding error can be raised.
|
||||||
|
# Only yields if there is an encoding error
|
||||||
|
# @yieldparam msg [String] The error message to be raised
|
||||||
|
# @return [String] The original string encoded properly
|
||||||
|
# @raise [ArgumentError] if the document declares an unknown encoding
|
||||||
|
def check_haml_encoding(str, &block)
|
||||||
|
return check_encoding(str, &block) if ruby1_8?
|
||||||
|
|
||||||
|
bom, encoding = parse_haml_magic_comment(str)
|
||||||
|
if encoding; str.force_encoding(encoding)
|
||||||
|
elsif bom; str.force_encoding("UTF-8")
|
||||||
|
end
|
||||||
|
|
||||||
|
return check_encoding(str, &block)
|
||||||
|
end
|
||||||
|
|
||||||
# Like {\#check\_encoding}, but also checks for a `@charset` declaration
|
# Like {\#check\_encoding}, but also checks for a `@charset` declaration
|
||||||
# at the beginning of the file and uses that encoding if it exists.
|
# at the beginning of the file and uses that encoding if it exists.
|
||||||
#
|
#
|
||||||
|
@ -695,5 +727,36 @@ METHOD
|
||||||
return lcs_backtrace(c, x, y, i, j-1, &block) if c[i][j-1] > c[i-1][j]
|
return lcs_backtrace(c, x, y, i, j-1, &block) if c[i][j-1] > c[i-1][j]
|
||||||
return lcs_backtrace(c, x, y, i-1, j, &block)
|
return lcs_backtrace(c, x, y, i-1, j, &block)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Parses a magic comment at the beginning of a Haml file.
|
||||||
|
# The parsing rules are basically the same as Ruby's.
|
||||||
|
#
|
||||||
|
# @return [(Boolean, String or nil)]
|
||||||
|
# Whether the document begins with a UTF-8 BOM,
|
||||||
|
# and the declared encoding of the document (or nil if none is declared)
|
||||||
|
def parse_haml_magic_comment(str)
|
||||||
|
scanner = StringScanner.new(str.dup.force_encoding("BINARY"))
|
||||||
|
bom = scanner.scan(/\xEF\xBB\xBF/n)
|
||||||
|
return bom unless scanner.scan(/-\s*#\s*/n)
|
||||||
|
if coding = try_parse_haml_emacs_magic_comment(scanner)
|
||||||
|
return bom, coding
|
||||||
|
end
|
||||||
|
|
||||||
|
return bom unless scanner.scan(/.*?coding[=:]\s*([\w-]+)/in)
|
||||||
|
return bom, scanner[1]
|
||||||
|
end
|
||||||
|
|
||||||
|
def try_parse_haml_emacs_magic_comment(scanner)
|
||||||
|
pos = scanner.pos
|
||||||
|
return unless scanner.scan(/.*?-\*-\s*/n)
|
||||||
|
# From Ruby's parse.y
|
||||||
|
return unless scanner.scan(/([^\s'":;]+)\s*:\s*("(?:\\.|[^"])*"|[^"\s;]+?)[\s;]*-\*-/n)
|
||||||
|
name, val = scanner[1], scanner[2]
|
||||||
|
return unless name =~ /(en)?coding/in
|
||||||
|
val = $1 if val =~ /^"(.*)"$/n
|
||||||
|
return val
|
||||||
|
ensure
|
||||||
|
scanner.pos = pos
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1604,6 +1604,202 @@ HAML
|
||||||
assert_equal(3, e.line)
|
assert_equal(3, e.line)
|
||||||
assert_equal('Invalid UTF-16LE character "\xFE"', e.message)
|
assert_equal('Invalid UTF-16LE character "\xFE"', e.message)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_same_coding_comment_as_encoding
|
||||||
|
assert_renders_encoded(<<HTML, <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# coding: utf-8
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_different_coding_comment_than_encoding
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# coding: ibm866
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_different_coding_than_system
|
||||||
|
assert_renders_encoded(<<HTML.encode("IBM866"), <<HAML.encode("IBM866"))
|
||||||
|
<p>тАЬ</p>
|
||||||
|
HTML
|
||||||
|
%p тАЬ
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_case_insensitive_coding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# CodINg: IbM866
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_whitespace_insensitive_coding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-#coding:ibm866
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_equals_coding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# CodINg= ibm866
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_prefixed_coding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# foo BAR FAOJcoding: ibm866
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_suffixed_coding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# coding: ibm866 ASFJ (&(&#!$
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_emacs_prefixed_coding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# -*- coding: ibm866
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_emacs_suffixed_coding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# coding: ibm866 -*- coding: blah
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_emacs_coding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# -*- coding: ibm866 -*-
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_emacs_encoding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# -*- encoding: ibm866 -*-
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_quoted_emacs_coding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# -*- coding: "ibm866" -*-
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_whitespace_insensitive_emacs_coding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-#-*-coding:ibm866-*-
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_whitespace_insensitive_emacs_coding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-#-*-coding:ibm866-*-
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_one_of_several_emacs_comments
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# -*- foo: bar; coding: ibm866; baz: bang -*-
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_prefixed_emacs_coding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# foo bar coding: baz -*- coding: ibm866 -*-
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_suffixed_emacs_coding_comment
|
||||||
|
assert_renders_encoded(<<HTML.force_encoding("IBM866"), <<HAML)
|
||||||
|
<p>bâr</p>
|
||||||
|
<p>föö</p>
|
||||||
|
HTML
|
||||||
|
-# -*- coding: ibm866 -*- foo bar coding: baz
|
||||||
|
%p bâr
|
||||||
|
%p föö
|
||||||
|
HAML
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
@ -1618,4 +1814,10 @@ HAML
|
||||||
<p>föö</p>
|
<p>föö</p>
|
||||||
HTML
|
HTML
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def assert_renders_encoded(html, haml)
|
||||||
|
result = render(haml)
|
||||||
|
assert_equal html.encoding, result.encoding
|
||||||
|
assert_equal html, result
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Reference in a new issue