2008-09-09 09:09:56 -04:00
|
|
|
class CGI
|
2009-10-04 09:25:49 -04:00
|
|
|
@@accept_charset="UTF-8" unless defined?(@@accept_charset)
|
2008-09-09 09:09:56 -04:00
|
|
|
# URL-encode a string.
|
|
|
|
# url_encoded_string = CGI::escape("'Stop!' said Fred")
|
|
|
|
# # => "%27Stop%21%27+said+Fred"
|
|
|
|
def CGI::escape(string)
|
2012-01-20 06:48:52 -05:00
|
|
|
encoding = string.encoding
|
|
|
|
string.dup.force_encoding('ASCII-8BIT').gsub(/([^ a-zA-Z0-9_.-]+)/) do
|
2008-09-09 09:09:56 -04:00
|
|
|
'%' + $1.unpack('H2' * $1.bytesize).join('%').upcase
|
2012-01-20 06:48:52 -05:00
|
|
|
end.tr(' ', '+').force_encoding(encoding)
|
2008-09-09 09:09:56 -04:00
|
|
|
end
|
|
|
|
|
2009-07-30 01:34:02 -04:00
|
|
|
# URL-decode a string with encoding(optional).
|
2008-09-09 09:09:56 -04:00
|
|
|
# string = CGI::unescape("%27Stop%21%27+said+Fred")
|
|
|
|
# # => "'Stop!' said Fred"
|
2009-07-30 01:34:02 -04:00
|
|
|
def CGI::unescape(string,encoding=@@accept_charset)
|
2010-10-13 09:39:13 -04:00
|
|
|
str=string.tr('+', ' ').force_encoding(Encoding::ASCII_8BIT).gsub(/((?:%[0-9a-fA-F]{2})+)/) do
|
2009-07-30 01:34:02 -04:00
|
|
|
[$1.delete('%')].pack('H*')
|
|
|
|
end.force_encoding(encoding)
|
|
|
|
str.valid_encoding? ? str : str.force_encoding(string.encoding)
|
2008-09-09 09:09:56 -04:00
|
|
|
end
|
|
|
|
|
2011-05-31 20:42:06 -04:00
|
|
|
# The set of special characters and their escaped values
|
2008-09-09 09:09:56 -04:00
|
|
|
TABLE_FOR_ESCAPE_HTML__ = {
|
|
|
|
'&' => '&',
|
|
|
|
'"' => '"',
|
|
|
|
'<' => '<',
|
|
|
|
'>' => '>',
|
|
|
|
}
|
|
|
|
|
|
|
|
# Escape special characters in HTML, namely &\"<>
|
|
|
|
# CGI::escapeHTML('Usage: foo "bar" <baz>')
|
|
|
|
# # => "Usage: foo "bar" <baz>"
|
|
|
|
def CGI::escapeHTML(string)
|
2012-07-04 00:42:35 -04:00
|
|
|
string.gsub(/[&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
|
2008-09-09 09:09:56 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
# Unescape a string that has been HTML-escaped
|
|
|
|
# CGI::unescapeHTML("Usage: foo "bar" <baz>")
|
|
|
|
# # => "Usage: foo \"bar\" <baz>"
|
|
|
|
def CGI::unescapeHTML(string)
|
|
|
|
enc = string.encoding
|
|
|
|
if [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc)
|
2012-07-04 00:42:35 -04:00
|
|
|
return string.gsub(Regexp.new('&(amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
|
2008-09-09 09:09:56 -04:00
|
|
|
case $1.encode("US-ASCII")
|
|
|
|
when 'amp' then '&'.encode(enc)
|
|
|
|
when 'quot' then '"'.encode(enc)
|
|
|
|
when 'gt' then '>'.encode(enc)
|
|
|
|
when 'lt' then '<'.encode(enc)
|
|
|
|
when /\A#0*(\d+)\z/ then $1.to_i.chr(enc)
|
|
|
|
when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
asciicompat = Encoding.compatible?(string, "a")
|
2012-07-04 00:42:35 -04:00
|
|
|
string.gsub(/&(amp|quot|gt|lt|\#[0-9]+|\#x[0-9A-Fa-f]+);/) do
|
2008-09-09 09:09:56 -04:00
|
|
|
match = $1.dup
|
|
|
|
case match
|
|
|
|
when 'amp' then '&'
|
|
|
|
when 'quot' then '"'
|
|
|
|
when 'gt' then '>'
|
|
|
|
when 'lt' then '<'
|
|
|
|
when /\A#0*(\d+)\z/
|
|
|
|
n = $1.to_i
|
|
|
|
if enc == Encoding::UTF_8 or
|
|
|
|
enc == Encoding::ISO_8859_1 && n < 256 or
|
|
|
|
asciicompat && n < 128
|
|
|
|
n.chr(enc)
|
|
|
|
else
|
|
|
|
"&##{$1};"
|
|
|
|
end
|
|
|
|
when /\A#x([0-9a-f]+)\z/i
|
|
|
|
n = $1.hex
|
|
|
|
if enc == Encoding::UTF_8 or
|
|
|
|
enc == Encoding::ISO_8859_1 && n < 256 or
|
|
|
|
asciicompat && n < 128
|
|
|
|
n.chr(enc)
|
|
|
|
else
|
|
|
|
"&#x#{$1};"
|
|
|
|
end
|
|
|
|
else
|
|
|
|
"&#{match};"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
2011-05-18 17:07:13 -04:00
|
|
|
|
2011-05-31 20:42:06 -04:00
|
|
|
# Synonym for CGI::escapeHTML(str)
|
2008-09-09 09:09:56 -04:00
|
|
|
def CGI::escape_html(str)
|
|
|
|
escapeHTML(str)
|
|
|
|
end
|
2011-05-31 20:42:06 -04:00
|
|
|
|
|
|
|
# Synonym for CGI::unescapeHTML(str)
|
2008-09-09 09:09:56 -04:00
|
|
|
def CGI::unescape_html(str)
|
|
|
|
unescapeHTML(str)
|
|
|
|
end
|
|
|
|
|
|
|
|
# Escape only the tags of certain HTML elements in +string+.
|
|
|
|
#
|
|
|
|
# Takes an element or elements or array of elements. Each element
|
|
|
|
# is specified by the name of the element, without angle brackets.
|
|
|
|
# This matches both the start and the end tag of that element.
|
|
|
|
# The attribute list of the open tag will also be escaped (for
|
|
|
|
# instance, the double-quotes surrounding attribute values).
|
|
|
|
#
|
|
|
|
# print CGI::escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
|
|
|
|
# # "<BR><A HREF="url"></A>"
|
|
|
|
#
|
|
|
|
# print CGI::escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
|
|
|
|
# # "<BR><A HREF="url"></A>"
|
|
|
|
def CGI::escapeElement(string, *elements)
|
|
|
|
elements = elements[0] if elements[0].kind_of?(Array)
|
|
|
|
unless elements.empty?
|
|
|
|
string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do
|
|
|
|
CGI::escapeHTML($&)
|
|
|
|
end
|
|
|
|
else
|
|
|
|
string
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Undo escaping such as that done by CGI::escapeElement()
|
|
|
|
#
|
|
|
|
# print CGI::unescapeElement(
|
|
|
|
# CGI::escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
|
|
|
|
# # "<BR><A HREF="url"></A>"
|
2009-03-05 22:56:38 -05:00
|
|
|
#
|
2008-09-09 09:09:56 -04:00
|
|
|
# print CGI::unescapeElement(
|
|
|
|
# CGI::escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
|
|
|
|
# # "<BR><A HREF="url"></A>"
|
|
|
|
def CGI::unescapeElement(string, *elements)
|
|
|
|
elements = elements[0] if elements[0].kind_of?(Array)
|
|
|
|
unless elements.empty?
|
|
|
|
string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do
|
|
|
|
CGI::unescapeHTML($&)
|
|
|
|
end
|
|
|
|
else
|
|
|
|
string
|
|
|
|
end
|
|
|
|
end
|
2011-05-18 17:07:13 -04:00
|
|
|
|
2011-05-31 20:42:06 -04:00
|
|
|
# Synonym for CGI::escapeElement(str)
|
2008-09-09 09:09:56 -04:00
|
|
|
def CGI::escape_element(str)
|
|
|
|
escapeElement(str)
|
|
|
|
end
|
2011-05-31 20:42:06 -04:00
|
|
|
|
|
|
|
# Synonym for CGI::unescapeElement(str)
|
2008-09-09 09:09:56 -04:00
|
|
|
def CGI::unescape_element(str)
|
|
|
|
unescapeElement(str)
|
|
|
|
end
|
|
|
|
|
2010-04-19 23:39:34 -04:00
|
|
|
# Abbreviated day-of-week names specified by RFC 822
|
|
|
|
RFC822_DAYS = %w[ Sun Mon Tue Wed Thu Fri Sat ]
|
|
|
|
|
|
|
|
# Abbreviated month names specified by RFC 822
|
|
|
|
RFC822_MONTHS = %w[ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ]
|
|
|
|
|
2008-09-09 09:09:56 -04:00
|
|
|
# Format a +Time+ object as a String using the format specified by RFC 1123.
|
|
|
|
#
|
|
|
|
# CGI::rfc1123_date(Time.now)
|
|
|
|
# # Sat, 01 Jan 2000 00:00:00 GMT
|
|
|
|
def CGI::rfc1123_date(time)
|
|
|
|
t = time.clone.gmtime
|
|
|
|
return format("%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT",
|
|
|
|
RFC822_DAYS[t.wday], t.day, RFC822_MONTHS[t.month-1], t.year,
|
|
|
|
t.hour, t.min, t.sec)
|
|
|
|
end
|
|
|
|
|
|
|
|
# Prettify (indent) an HTML string.
|
|
|
|
#
|
|
|
|
# +string+ is the HTML string to indent. +shift+ is the indentation
|
|
|
|
# unit to use; it defaults to two spaces.
|
|
|
|
#
|
|
|
|
# print CGI::pretty("<HTML><BODY></BODY></HTML>")
|
|
|
|
# # <HTML>
|
|
|
|
# # <BODY>
|
|
|
|
# # </BODY>
|
|
|
|
# # </HTML>
|
2009-03-05 22:56:38 -05:00
|
|
|
#
|
2008-09-09 09:09:56 -04:00
|
|
|
# print CGI::pretty("<HTML><BODY></BODY></HTML>", "\t")
|
|
|
|
# # <HTML>
|
|
|
|
# # <BODY>
|
|
|
|
# # </BODY>
|
|
|
|
# # </HTML>
|
|
|
|
#
|
|
|
|
def CGI::pretty(string, shift = " ")
|
2009-11-12 10:48:46 -05:00
|
|
|
lines = string.gsub(/(?!\A)<.*?>/m, "\n\\0").gsub(/<.*?>(?!\n)/m, "\\0\n")
|
2008-09-09 09:09:56 -04:00
|
|
|
end_pos = 0
|
|
|
|
while end_pos = lines.index(/^<\/(\w+)/, end_pos)
|
|
|
|
element = $1.dup
|
|
|
|
start_pos = lines.rindex(/^\s*<#{element}/i, end_pos)
|
|
|
|
lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__"
|
|
|
|
end
|
|
|
|
lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1')
|
|
|
|
end
|
|
|
|
end
|