mirror of
https://github.com/jashkenas/coffeescript.git
synced 2022-11-09 12:23:24 -05:00
many more comments, plus a fix for inner-assignment indentation
This commit is contained in:
parent
8511a33b1e
commit
68bc68c1ac
7 changed files with 180 additions and 181 deletions
2
TODO
2
TODO
|
@ -1,5 +1,7 @@
|
|||
TODO:
|
||||
|
||||
* Write some tests.
|
||||
|
||||
* Code Cleanup.
|
||||
|
||||
* Is it possible to close blocks (functions, ifs, trys) without an explicit
|
||||
|
|
|
@ -16,19 +16,14 @@ run_loop: =>
|
|||
dense_object_literal: {one: 1, two: 2, three: 3}
|
||||
|
||||
spaced_out_multiline_object: {
|
||||
|
||||
pi: 3.14159
|
||||
|
||||
list: [1, 2, 3, 4]
|
||||
|
||||
regex: /match[ing](every|thing|\/)/gi
|
||||
|
||||
three: new Idea()
|
||||
|
||||
inner_obj: {
|
||||
freedom: => _.freedom().
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
# Arrays:
|
||||
|
@ -38,6 +33,11 @@ exponents : [x => x., x => x * x., x => x * x * x.]
|
|||
|
||||
empty: []
|
||||
|
||||
multiline: [
|
||||
'line one'
|
||||
'line two'
|
||||
]
|
||||
|
||||
# Conditionals and ternaries.
|
||||
if submarine.shields_up
|
||||
full_speed_ahead()
|
||||
|
@ -64,7 +64,7 @@ good ||= evil
|
|||
wine &&= cheese
|
||||
|
||||
# Nested property access and calls.
|
||||
((moon.turn(360))).shapes[3].move({x: 45, y: 30}).position
|
||||
((moon.turn(360))).shapes[3].move({x: 45, y: 30}).position['top'].offset('x')
|
||||
|
||||
a: b: c: 5
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ rule
|
|||
| Terminator Expressions { result = val[1] }
|
||||
;
|
||||
|
||||
# All types of expressions in our language
|
||||
# All types of expressions in our language.
|
||||
Expression:
|
||||
Literal
|
||||
| Value
|
||||
|
@ -70,19 +70,19 @@ rule
|
|||
| Switch
|
||||
;
|
||||
|
||||
# All tokens that can terminate an expression
|
||||
# All tokens that can terminate an expression.
|
||||
Terminator:
|
||||
"\n"
|
||||
| ";"
|
||||
;
|
||||
|
||||
# All tokens that can serve to begin the second block
|
||||
# All tokens that can serve to begin the second block of a multi-part expression.
|
||||
Then:
|
||||
THEN
|
||||
| Terminator
|
||||
;
|
||||
|
||||
# All hard-coded values
|
||||
# All hard-coded values.
|
||||
Literal:
|
||||
NUMBER { result = LiteralNode.new(val[0]) }
|
||||
| STRING { result = LiteralNode.new(val[0]) }
|
||||
|
@ -95,7 +95,7 @@ rule
|
|||
| CONTINUE { result = LiteralNode.new(val[0]) }
|
||||
;
|
||||
|
||||
# Assign to a variable
|
||||
# Assignment to a variable.
|
||||
Assign:
|
||||
Value ":" Expression { result = AssignNode.new(val[0], val[2]) }
|
||||
;
|
||||
|
@ -105,7 +105,7 @@ rule
|
|||
IDENTIFIER ":" Expression { result = AssignNode.new(val[0], val[2], :object) }
|
||||
;
|
||||
|
||||
# A Return statement.
|
||||
# A return statement.
|
||||
Return:
|
||||
RETURN Expression { result = ReturnNode.new(val[1]) }
|
||||
;
|
||||
|
@ -150,24 +150,25 @@ rule
|
|||
| DELETE Expression { result = OpNode.new(val[0], val[1]) }
|
||||
;
|
||||
|
||||
|
||||
# Method definition
|
||||
# Function definition.
|
||||
Code:
|
||||
ParamList "=>" CodeBody "." { result = CodeNode.new(val[0], val[2]) }
|
||||
| "=>" CodeBody "." { result = CodeNode.new([], val[1]) }
|
||||
;
|
||||
|
||||
# The body of a function.
|
||||
CodeBody:
|
||||
/* nothing */ { result = Nodes.new([]) }
|
||||
| Expressions { result = val[0] }
|
||||
;
|
||||
|
||||
|
||||
# The parameters to a function definition.
|
||||
ParamList:
|
||||
PARAM { result = val }
|
||||
| ParamList "," PARAM { result = val[0] << val[2] }
|
||||
;
|
||||
|
||||
# Expressions that can be treated as values.
|
||||
Value:
|
||||
IDENTIFIER { result = ValueNode.new(val) }
|
||||
| Array { result = ValueNode.new(val) }
|
||||
|
@ -177,24 +178,29 @@ rule
|
|||
| Invocation Accessor { result = ValueNode.new(val[0], [val[1]]) }
|
||||
;
|
||||
|
||||
# Accessing into an object or array, through dot or index notation.
|
||||
Accessor:
|
||||
PROPERTY_ACCESS IDENTIFIER { result = AccessorNode.new(val[1]) }
|
||||
| Index { result = val[0] }
|
||||
| Slice { result = val[0] }
|
||||
;
|
||||
|
||||
# Indexing into an object or array.
|
||||
Index:
|
||||
"[" Expression "]" { result = IndexNode.new(val[1]) }
|
||||
;
|
||||
|
||||
# Array slice literal.
|
||||
Slice:
|
||||
"[" Expression "," Expression "]" { result = SliceNode.new(val[1], val[3]) }
|
||||
;
|
||||
|
||||
# An object literal.
|
||||
Object:
|
||||
"{" AssignList "}" { result = ObjectNode.new(val[1]) }
|
||||
;
|
||||
|
||||
# Assignment within an object literal (comma or newline separated).
|
||||
AssignList:
|
||||
/* nothing */ { result = []}
|
||||
| AssignObj { result = val }
|
||||
|
@ -202,27 +208,29 @@ rule
|
|||
| AssignList Terminator AssignObj { result = val[0] << val[2] }
|
||||
;
|
||||
|
||||
# A method call.
|
||||
# All flavors of function call (instantiation, super, and regular).
|
||||
Call:
|
||||
Invocation { result = val[0] }
|
||||
| NEW Invocation { result = val[1].new_instance }
|
||||
| Super { result = val[0] }
|
||||
;
|
||||
|
||||
# A generic function invocation.
|
||||
Invocation:
|
||||
Value "(" ArgList ")" { result = CallNode.new(val[0], val[2]) }
|
||||
;
|
||||
|
||||
# Calling super.
|
||||
Super:
|
||||
SUPER "(" ArgList ")" { result = CallNode.new(:super, val[2]) }
|
||||
;
|
||||
|
||||
# An Array.
|
||||
# The array literal.
|
||||
Array:
|
||||
"[" ArgList "]" { result = ArrayNode.new(val[1]) }
|
||||
;
|
||||
|
||||
# A list of arguments to a method call.
|
||||
# A list of arguments to a method call, or as the contents of an array.
|
||||
ArgList:
|
||||
/* nothing */ { result = [] }
|
||||
| Expression { result = val }
|
||||
|
@ -296,6 +304,9 @@ rule
|
|||
|
||||
end
|
||||
|
||||
---- header
|
||||
module CoffeeScript
|
||||
|
||||
---- inner
|
||||
def parse(code)
|
||||
# @yydebug = true
|
||||
|
@ -308,5 +319,8 @@ end
|
|||
end
|
||||
|
||||
def on_error(error_token_id, error_value, value_stack)
|
||||
raise CoffeeScript::ParseError.new(token_to_str(error_token_id), error_value, value_stack)
|
||||
end
|
||||
raise ParseError.new(token_to_str(error_token_id), error_value, value_stack)
|
||||
end
|
||||
|
||||
---- footer
|
||||
end
|
|
@ -1,157 +1,161 @@
|
|||
class Lexer
|
||||
module CoffeeScript
|
||||
|
||||
KEYWORDS = ["if", "else", "then", "unless",
|
||||
"true", "false", "null",
|
||||
"and", "or", "is", "aint", "not",
|
||||
"new", "return",
|
||||
"try", "catch", "finally", "throw",
|
||||
"break", "continue",
|
||||
"for", "in", "while",
|
||||
"switch", "case",
|
||||
"super",
|
||||
"delete"]
|
||||
class Lexer
|
||||
|
||||
IDENTIFIER = /\A([a-zA-Z$_]\w*)/
|
||||
NUMBER = /\A\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?))\b/i
|
||||
STRING = /\A("(.*?)[^\\]"|'(.*?)[^\\]')/m
|
||||
JS = /\A(`(.*?)`)/
|
||||
OPERATOR = /\A([+\*&|\/\-%=<>]+)/
|
||||
WHITESPACE = /\A([ \t\r]+)/
|
||||
NEWLINE = /\A([\r\n]+)/
|
||||
COMMENT = /\A(#[^\r\n]*)/
|
||||
CODE = /\A(=>)/
|
||||
REGEX = /\A(\/(.*?)[^\\]\/[imgy]{0,4})/
|
||||
KEYWORDS = ["if", "else", "then", "unless",
|
||||
"true", "false", "null",
|
||||
"and", "or", "is", "aint", "not",
|
||||
"new", "return",
|
||||
"try", "catch", "finally", "throw",
|
||||
"break", "continue",
|
||||
"for", "in", "while",
|
||||
"switch", "case",
|
||||
"super",
|
||||
"delete"]
|
||||
|
||||
JS_CLEANER = /(\A`|`\Z)/
|
||||
MULTILINER = /[\r\n]/
|
||||
IDENTIFIER = /\A([a-zA-Z$_]\w*)/
|
||||
NUMBER = /\A\b((0(x|X)[0-9a-fA-F]+)|([0-9]+(\.[0-9]+)?(e[+\-]?[0-9]+)?))\b/i
|
||||
STRING = /\A("(.*?)[^\\]"|'(.*?)[^\\]')/m
|
||||
JS = /\A(`(.*?)`)/
|
||||
OPERATOR = /\A([+\*&|\/\-%=<>]+)/
|
||||
WHITESPACE = /\A([ \t\r]+)/
|
||||
NEWLINE = /\A([\r\n]+)/
|
||||
COMMENT = /\A(#[^\r\n]*)/
|
||||
CODE = /\A(=>)/
|
||||
REGEX = /\A(\/(.*?)[^\\]\/[imgy]{0,4})/
|
||||
|
||||
EXP_START = ['{', '(', '[']
|
||||
EXP_END = ['}', ')', ']']
|
||||
JS_CLEANER = /(\A`|`\Z)/
|
||||
MULTILINER = /[\r\n]/
|
||||
|
||||
# This is how to implement a very simple scanner.
|
||||
# Scan one caracter at the time until you find something to parse.
|
||||
def tokenize(code)
|
||||
@code = code.chomp # Cleanup code by remove extra line breaks
|
||||
@i = 0 # Current character position we're parsing
|
||||
@line = 1 # The current line.
|
||||
@tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
|
||||
while @i < @code.length
|
||||
@chunk = @code[@i..-1]
|
||||
extract_next_token
|
||||
EXP_START = ['{', '(', '[']
|
||||
EXP_END = ['}', ')', ']']
|
||||
|
||||
# This is how to implement a very simple scanner.
|
||||
# Scan one caracter at the time until you find something to parse.
|
||||
def tokenize(code)
|
||||
@code = code.chomp # Cleanup code by remove extra line breaks
|
||||
@i = 0 # Current character position we're parsing
|
||||
@line = 1 # The current line.
|
||||
@tokens = [] # Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
|
||||
while @i < @code.length
|
||||
@chunk = @code[@i..-1]
|
||||
extract_next_token
|
||||
end
|
||||
@tokens
|
||||
end
|
||||
@tokens
|
||||
end
|
||||
|
||||
def extract_next_token
|
||||
return if identifier_token
|
||||
return if number_token
|
||||
return if string_token
|
||||
return if js_token
|
||||
return if regex_token
|
||||
return if remove_comment
|
||||
return if whitespace_token
|
||||
return literal_token
|
||||
end
|
||||
|
||||
# Matching if, print, method names, etc.
|
||||
def identifier_token
|
||||
return false unless identifier = @chunk[IDENTIFIER, 1]
|
||||
# Keywords are special identifiers tagged with their own name, 'if' will result
|
||||
# in an [:IF, "if"] token
|
||||
tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER
|
||||
@tokens[-1][0] = :PROPERTY_ACCESS if tag == :IDENTIFIER && last_value == '.'
|
||||
token(tag, identifier)
|
||||
@i += identifier.length
|
||||
end
|
||||
|
||||
def number_token
|
||||
return false unless number = @chunk[NUMBER, 1]
|
||||
token(:NUMBER, number)
|
||||
@i += number.length
|
||||
end
|
||||
|
||||
def string_token
|
||||
return false unless string = @chunk[STRING, 1]
|
||||
escaped = string.gsub(MULTILINER) do |match|
|
||||
@line += 1
|
||||
"\\\n"
|
||||
def extract_next_token
|
||||
return if identifier_token
|
||||
return if number_token
|
||||
return if string_token
|
||||
return if js_token
|
||||
return if regex_token
|
||||
return if remove_comment
|
||||
return if whitespace_token
|
||||
return literal_token
|
||||
end
|
||||
token(:STRING, escaped)
|
||||
@i += string.length
|
||||
end
|
||||
|
||||
def js_token
|
||||
return false unless script = @chunk[JS, 1]
|
||||
token(:JS, script.gsub(JS_CLEANER, ''))
|
||||
@i += script.length
|
||||
end
|
||||
|
||||
def regex_token
|
||||
return false unless regex = @chunk[REGEX, 1]
|
||||
token(:REGEX, regex)
|
||||
@i += regex.length
|
||||
end
|
||||
|
||||
def remove_comment
|
||||
return false unless comment = @chunk[COMMENT, 1]
|
||||
@i += comment.length
|
||||
end
|
||||
|
||||
# Ignore whitespace
|
||||
def whitespace_token
|
||||
return false unless whitespace = @chunk[WHITESPACE, 1]
|
||||
@i += whitespace.length
|
||||
end
|
||||
|
||||
# We treat all other single characters as a token. Eg.: ( ) , . !
|
||||
# Multi-character operators are also literal tokens, so that Racc can assign
|
||||
# the proper order of operations. Multiple newlines get merged.
|
||||
def literal_token
|
||||
value = @chunk[NEWLINE, 1]
|
||||
if value
|
||||
@line += value.length
|
||||
token("\n", "\n") unless last_value == "\n"
|
||||
return @i += value.length
|
||||
# Matching if, print, method names, etc.
|
||||
def identifier_token
|
||||
return false unless identifier = @chunk[IDENTIFIER, 1]
|
||||
# Keywords are special identifiers tagged with their own name, 'if' will result
|
||||
# in an [:IF, "if"] token
|
||||
tag = KEYWORDS.include?(identifier) ? identifier.upcase.to_sym : :IDENTIFIER
|
||||
@tokens[-1][0] = :PROPERTY_ACCESS if tag == :IDENTIFIER && last_value == '.'
|
||||
token(tag, identifier)
|
||||
@i += identifier.length
|
||||
end
|
||||
value = @chunk[OPERATOR, 1]
|
||||
tag_parameters if value && value.match(CODE)
|
||||
value ||= @chunk[0,1]
|
||||
skip_following_newlines if EXP_START.include?(value)
|
||||
remove_leading_newlines if EXP_END.include?(value)
|
||||
token(value, value)
|
||||
@i += value.length
|
||||
end
|
||||
|
||||
def token(tag, value)
|
||||
@tokens << [tag, Value.new(value, @line)]
|
||||
end
|
||||
|
||||
def last_value
|
||||
@tokens.last && @tokens.last[1]
|
||||
end
|
||||
|
||||
# The main source of ambiguity in our grammar was Parameter lists (as opposed
|
||||
# to argument lists in method calls). Tag parameter identifiers to avoid this.
|
||||
def tag_parameters
|
||||
index = 0
|
||||
loop do
|
||||
tok = @tokens[index -= 1]
|
||||
next if tok[0] == ','
|
||||
return if tok[0] != :IDENTIFIER
|
||||
tok[0] = :PARAM
|
||||
def number_token
|
||||
return false unless number = @chunk[NUMBER, 1]
|
||||
token(:NUMBER, number)
|
||||
@i += number.length
|
||||
end
|
||||
end
|
||||
|
||||
def skip_following_newlines
|
||||
newlines = @code[(@i+1)..-1][NEWLINE, 1]
|
||||
if newlines
|
||||
@line += newlines.length
|
||||
@i += newlines.length
|
||||
def string_token
|
||||
return false unless string = @chunk[STRING, 1]
|
||||
escaped = string.gsub(MULTILINER) do |match|
|
||||
@line += 1
|
||||
"\\\n"
|
||||
end
|
||||
token(:STRING, escaped)
|
||||
@i += string.length
|
||||
end
|
||||
|
||||
def js_token
|
||||
return false unless script = @chunk[JS, 1]
|
||||
token(:JS, script.gsub(JS_CLEANER, ''))
|
||||
@i += script.length
|
||||
end
|
||||
|
||||
def regex_token
|
||||
return false unless regex = @chunk[REGEX, 1]
|
||||
token(:REGEX, regex)
|
||||
@i += regex.length
|
||||
end
|
||||
|
||||
def remove_comment
|
||||
return false unless comment = @chunk[COMMENT, 1]
|
||||
@i += comment.length
|
||||
end
|
||||
|
||||
# Ignore whitespace
|
||||
def whitespace_token
|
||||
return false unless whitespace = @chunk[WHITESPACE, 1]
|
||||
@i += whitespace.length
|
||||
end
|
||||
|
||||
# We treat all other single characters as a token. Eg.: ( ) , . !
|
||||
# Multi-character operators are also literal tokens, so that Racc can assign
|
||||
# the proper order of operations. Multiple newlines get merged.
|
||||
def literal_token
|
||||
value = @chunk[NEWLINE, 1]
|
||||
if value
|
||||
@line += value.length
|
||||
token("\n", "\n") unless last_value == "\n"
|
||||
return @i += value.length
|
||||
end
|
||||
value = @chunk[OPERATOR, 1]
|
||||
tag_parameters if value && value.match(CODE)
|
||||
value ||= @chunk[0,1]
|
||||
skip_following_newlines if EXP_START.include?(value)
|
||||
remove_leading_newlines if EXP_END.include?(value)
|
||||
token(value, value)
|
||||
@i += value.length
|
||||
end
|
||||
|
||||
def token(tag, value)
|
||||
@tokens << [tag, Value.new(value, @line)]
|
||||
end
|
||||
|
||||
def last_value
|
||||
@tokens.last && @tokens.last[1]
|
||||
end
|
||||
|
||||
# The main source of ambiguity in our grammar was Parameter lists (as opposed
|
||||
# to argument lists in method calls). Tag parameter identifiers to avoid this.
|
||||
def tag_parameters
|
||||
index = 0
|
||||
loop do
|
||||
tok = @tokens[index -= 1]
|
||||
next if tok[0] == ','
|
||||
return if tok[0] != :IDENTIFIER
|
||||
tok[0] = :PARAM
|
||||
end
|
||||
end
|
||||
|
||||
def skip_following_newlines
|
||||
newlines = @code[(@i+1)..-1][NEWLINE, 1]
|
||||
if newlines
|
||||
@line += newlines.length
|
||||
@i += newlines.length
|
||||
end
|
||||
end
|
||||
|
||||
def remove_leading_newlines
|
||||
@tokens.pop if last_value == "\n"
|
||||
end
|
||||
end
|
||||
|
||||
def remove_leading_newlines
|
||||
@tokens.pop if last_value == "\n"
|
||||
end
|
||||
|
||||
end
|
|
@ -205,7 +205,7 @@ class AssignNode < Node
|
|||
name = @variable.compile(indent, scope) if @variable.respond_to?(:compile)
|
||||
last = @variable.respond_to?(:last) ? @variable.last : name
|
||||
opts = opts.merge({:assign => name, :last_assign => last})
|
||||
value = @value.compile(indent, scope, opts)
|
||||
value = @value.compile(indent + TAB, scope, opts)
|
||||
return "#{@variable}: #{value}" if @context == :object
|
||||
return "#{name} = #{value}" if @variable.properties?
|
||||
defined = scope.find(name)
|
||||
|
|
|
@ -1,2 +0,0 @@
|
|||
require "lexer"
|
||||
p Lexer.new.tokenize(File.read('code.cs'))
|
|
@ -1,19 +0,0 @@
|
|||
# Recompile the Parser.
|
||||
# With debugging and verbose: -v -g
|
||||
`racc -v -o parser.rb grammar.y`
|
||||
|
||||
# Parse and print the compiled CoffeeScript source.
|
||||
require "parser.rb"
|
||||
js = Parser.new.parse(File.read('code.cs')).compile
|
||||
puts "\n\n"
|
||||
puts js
|
||||
|
||||
# Pipe compiled JS through JSLint.
|
||||
puts "\n\n"
|
||||
require 'open3'
|
||||
stdin, stdout, stderr = Open3.popen3('/Users/jashkenas/Library/Application\ Support/TextMate/Bundles/JavaScript\ Tools.tmbundle/Support/bin/jsl -nologo -stdin')
|
||||
stdin.write(js)
|
||||
stdin.close
|
||||
puts stdout.read
|
||||
stdout.close
|
||||
stderr.close
|
Loading…
Reference in a new issue