ruby--ruby/test/racc/assets/mediacloth.y

600 lines
16 KiB
Plaintext

# Copyright (c) 2006 Pluron Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# The parser for the MediaWiki language.
#
# Usage together with a lexer:
# inputFile = File.new("data/input1", "r")
# input = inputFile.read
# parser = MediaWikiParser.new
# parser.lexer = MediaWikiLexer.new
# parser.parse(input)
class MediaWikiParser
token TEXT BOLD_START BOLD_END ITALIC_START ITALIC_END LINK_START LINK_END LINKSEP
INTLINK_START INTLINK_END INTLINKSEP RESOURCESEP CHAR_ENT
PRE_START PRE_END PREINDENT_START PREINDENT_END
SECTION_START SECTION_END HLINE SIGNATURE_NAME SIGNATURE_DATE SIGNATURE_FULL
PARA_START PARA_END UL_START UL_END OL_START OL_END LI_START LI_END
DL_START DL_END DT_START DT_END DD_START DD_END TAG_START TAG_END ATTR_NAME ATTR_VALUE
TABLE_START TABLE_END ROW_START ROW_END HEAD_START HEAD_END CELL_START CELL_END
KEYWORD TEMPLATE_START TEMPLATE_END CATEGORY PASTE_START PASTE_END
rule
wiki:
repeated_contents
{
@nodes.push WikiAST.new(0, @wiki_ast_length)
#@nodes.last.children.insert(0, val[0])
#puts val[0]
@nodes.last.children += val[0]
}
;
contents:
text
{
result = val[0]
}
| bulleted_list
{
result = val[0]
}
| numbered_list
{
result = val[0]
}
| dictionary_list
{
list = ListAST.new(@ast_index, @ast_length)
list.list_type = :Dictionary
list.children = val[0]
result = list
}
| preformatted
{
result = val[0]
}
| section
{
result = val[0]
}
| tag
{
result = val[0]
}
| template
{
result = val[0]
}
| KEYWORD
{
k = KeywordAST.new(@ast_index, @ast_length)
k.text = val[0]
result = k
}
| PARA_START para_contents PARA_END
{
p = ParagraphAST.new(@ast_index, @ast_length)
p.children = val[1]
result = p
}
| LINK_START link_contents LINK_END
{
l = LinkAST.new(@ast_index, @ast_length)
l.link_type = val[0]
l.url = val[1][0]
l.children += val[1][1..-1] if val[1].length > 1
result = l
}
| PASTE_START para_contents PASTE_END
{
p = PasteAST.new(@ast_index, @ast_length)
p.children = val[1]
result = p
}
| INTLINK_START TEXT RESOURCESEP TEXT reslink_repeated_contents INTLINK_END
{
l = ResourceLinkAST.new(@ast_index, @ast_length)
l.prefix = val[1]
l.locator = val[3]
l.children = val[4] unless val[4].nil? or val[4].empty?
result = l
}
| INTLINK_START TEXT intlink_repeated_contents INTLINK_END
{
l = InternalLinkAST.new(@ast_index, @ast_length)
l.locator = val[1]
l.children = val[2] unless val[2].nil? or val[2].empty?
result = l
}
| INTLINK_START CATEGORY TEXT cat_sort_contents INTLINK_END
{
l = CategoryAST.new(@ast_index, @ast_length)
l.locator = val[2]
l.sort_as = val[3]
result = l
}
| INTLINK_START RESOURCESEP CATEGORY TEXT intlink_repeated_contents INTLINK_END
{
l = CategoryLinkAST.new(@ast_index, @ast_length)
l.locator = val[3]
l.children = val[4] unless val[4].nil? or val[4].empty?
result = l
}
| table
;
para_contents:
{
result = nil
}
| repeated_contents
{
result = val[0]
}
;
tag:
TAG_START tag_attributes TAG_END
{
if val[0] != val[2]
raise Racc::ParseError.new("XHTML end tag #{val[2]} does not match start tag #{val[0]}")
end
elem = ElementAST.new(@ast_index, @ast_length)
elem.name = val[0]
elem.attributes = val[1]
result = elem
}
| TAG_START tag_attributes repeated_contents TAG_END
{
if val[0] != val[3]
raise Racc::ParseError.new("XHTML end tag #{val[3]} does not match start tag #{val[0]}")
end
elem = ElementAST.new(@ast_index, @ast_length)
elem.name = val[0]
elem.attributes = val[1]
elem.children += val[2]
result = elem
}
;
tag_attributes:
{
result = nil
}
| ATTR_NAME tag_attributes
{
attr_map = val[2] ? val[2] : {}
attr_map[val[0]] = true
result = attr_map
}
| ATTR_NAME ATTR_VALUE tag_attributes
{
attr_map = val[2] ? val[2] : {}
attr_map[val[0]] = val[1]
result = attr_map
}
;
link_contents:
TEXT
{
result = val
}
| TEXT LINKSEP link_repeated_contents
{
result = [val[0]]
result += val[2]
}
;
link_repeated_contents:
repeated_contents
{
result = val[0]
}
| repeated_contents LINKSEP link_repeated_contents
{
result = val[0]
result += val[2] if val[2]
}
;
intlink_repeated_contents:
{
result = nil
}
| INTLINKSEP repeated_contents
{
result = val[1]
}
;
cat_sort_contents:
{
result = nil
}
| INTLINKSEP TEXT
{
result = val[1]
}
;
reslink_repeated_contents:
{
result = nil
}
| INTLINKSEP reslink_repeated_contents
{
result = val[1]
}
| INTLINKSEP repeated_contents reslink_repeated_contents
{
i = InternalLinkItemAST.new(@ast_index, @ast_length)
i.children = val[1]
result = [i]
result += val[2] if val[2]
}
;
repeated_contents: contents
{
result = []
result << val[0]
}
| repeated_contents contents
{
result = []
result += val[0]
result << val[1]
}
;
text: element
{
p = TextAST.new(@ast_index, @ast_length)
p.formatting = val[0][0]
p.contents = val[0][1]
result = p
}
| formatted_element
{
result = val[0]
}
;
table:
TABLE_START table_contents TABLE_END
{
table = TableAST.new(@ast_index, @ast_length)
table.children = val[1] unless val[1].nil? or val[1].empty?
result = table
}
| TABLE_START TEXT table_contents TABLE_END
{
table = TableAST.new(@ast_index, @ast_length)
table.options = val[1]
table.children = val[2] unless val[2].nil? or val[2].empty?
result = table
}
table_contents:
{
result = nil
}
| ROW_START row_contents ROW_END table_contents
{
row = TableRowAST.new(@ast_index, @ast_length)
row.children = val[1] unless val[1].nil? or val[1].empty?
result = [row]
result += val[3] unless val[3].nil? or val[3].empty?
}
| ROW_START TEXT row_contents ROW_END table_contents
{
row = TableRowAST.new(@ast_index, @ast_length)
row.children = val[2] unless val[2].nil? or val[2].empty?
row.options = val[1]
result = [row]
result += val[4] unless val[4].nil? or val[4].empty?
}
row_contents:
{
result = nil
}
| HEAD_START HEAD_END row_contents
{
cell = TableCellAST.new(@ast_index, @ast_length)
cell.type = :head
result = [cell]
result += val[2] unless val[2].nil? or val[2].empty?
}
| HEAD_START repeated_contents HEAD_END row_contents
{
cell = TableCellAST.new(@ast_index, @ast_length)
cell.children = val[1] unless val[1].nil? or val[1].empty?
cell.type = :head
result = [cell]
result += val[3] unless val[3].nil? or val[3].empty?
}
| CELL_START CELL_END row_contents
{
cell = TableCellAST.new(@ast_index, @ast_length)
cell.type = :body
result = [cell]
result += val[2] unless val[2].nil? or val[2].empty?
}
| CELL_START repeated_contents CELL_END row_contents
{
if val[2] == 'attributes'
result = []
else
cell = TableCellAST.new(@ast_index, @ast_length)
cell.children = val[1] unless val[1].nil? or val[1].empty?
cell.type = :body
result = [cell]
end
result += val[3] unless val[3].nil? or val[3].empty?
if val[2] == 'attributes' and val[3] and val[3].first.class == TableCellAST
val[3].first.attributes = val[1]
end
result
}
element:
TEXT
{ return [:None, val[0]] }
| HLINE
{ return [:HLine, val[0]] }
| CHAR_ENT
{ return [:CharacterEntity, val[0]] }
| SIGNATURE_DATE
{ return [:SignatureDate, val[0]] }
| SIGNATURE_NAME
{ return [:SignatureName, val[0]] }
| SIGNATURE_FULL
{ return [:SignatureFull, val[0]] }
;
formatted_element:
BOLD_START BOLD_END
{
result = FormattedAST.new(@ast_index, @ast_length)
result.formatting = :Bold
result
}
| ITALIC_START ITALIC_END
{
result = FormattedAST.new(@ast_index, @ast_length)
result.formatting = :Italic
result
}
| BOLD_START repeated_contents BOLD_END
{
p = FormattedAST.new(@ast_index, @ast_length)
p.formatting = :Bold
p.children += val[1]
result = p
}
| ITALIC_START repeated_contents ITALIC_END
{
p = FormattedAST.new(@ast_index, @ast_length)
p.formatting = :Italic
p.children += val[1]
result = p
}
;
bulleted_list: UL_START list_item list_contents UL_END
{
list = ListAST.new(@ast_index, @ast_length)
list.list_type = :Bulleted
list.children << val[1]
list.children += val[2]
result = list
}
;
numbered_list: OL_START list_item list_contents OL_END
{
list = ListAST.new(@ast_index, @ast_length)
list.list_type = :Numbered
list.children << val[1]
list.children += val[2]
result = list
}
;
list_contents:
{ result = [] }
list_item list_contents
{
result << val[1]
result += val[2]
}
|
{ result = [] }
;
list_item:
LI_START LI_END
{
result = ListItemAST.new(@ast_index, @ast_length)
}
| LI_START repeated_contents LI_END
{
li = ListItemAST.new(@ast_index, @ast_length)
li.children += val[1]
result = li
}
;
dictionary_list:
DL_START dictionary_term dictionary_contents DL_END
{
result = [val[1]]
result += val[2]
}
| DL_START dictionary_contents DL_END
{
result = val[1]
}
;
dictionary_term:
DT_START DT_END
{
result = ListTermAST.new(@ast_index, @ast_length)
}
| DT_START repeated_contents DT_END
{
term = ListTermAST.new(@ast_index, @ast_length)
term.children += val[1]
result = term
}
dictionary_contents:
dictionary_definition dictionary_contents
{
result = [val[0]]
result += val[1] if val[1]
}
|
{
result = []
}
dictionary_definition:
DD_START DD_END
{
result = ListDefinitionAST.new(@ast_index, @ast_length)
}
| DD_START repeated_contents DD_END
{
term = ListDefinitionAST.new(@ast_index, @ast_length)
term.children += val[1]
result = term
}
preformatted: PRE_START repeated_contents PRE_END
{
p = PreformattedAST.new(@ast_index, @ast_length)
p.children += val[1]
result = p
}
| PREINDENT_START repeated_contents PREINDENT_END
{
p = PreformattedAST.new(@ast_index, @ast_length)
p.indented = true
p.children += val[1]
result = p
}
;
section: SECTION_START repeated_contents SECTION_END
{ result = [val[1], val[0].length]
s = SectionAST.new(@ast_index, @ast_length)
s.children = val[1]
s.level = val[0].length
result = s
}
;
template: TEMPLATE_START TEXT template_parameters TEMPLATE_END
{
t = TemplateAST.new(@ast_index, @ast_length)
t.template_name = val[1]
t.children = val[2] unless val[2].nil? or val[2].empty?
result = t
}
;
template_parameters:
{
result = nil
}
| INTLINKSEP TEXT template_parameters
{
p = TemplateParameterAST.new(@ast_index, @ast_length)
p.parameter_value = val[1]
result = [p]
result += val[2] if val[2]
}
| INTLINKSEP template template_parameters
{
p = TemplateParameterAST.new(@ast_index, @ast_length)
p.children << val[1]
result = [p]
result += val[2] if val[2]
}
;
end
---- header ----
require 'mediacloth/mediawikiast'
---- inner ----
attr_accessor :lexer
def initialize
@nodes = []
@context = []
@wiki_ast_length = 0
super
end
#Tokenizes input string and parses it.
def parse(input)
@yydebug=true
lexer.tokenize(input)
do_parse
return @nodes.last
end
#Asks the lexer to return the next token.
def next_token
token = @lexer.lex
if token[0].to_s.upcase.include? "_START"
@context << token[2..3]
elsif token[0].to_s.upcase.include? "_END"
@ast_index = @context.last[0]
@ast_length = token[2] + token[3] - @context.last[0]
@context.pop
else
@ast_index = token[2]
@ast_length = token[3]
end
@wiki_ast_length += token[3]
return token[0..1]
end