mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
37c266aa57
While the stripping of header values is required by RFC 2616 4.2 and RFC 7230 3.2.4, the squishing is not and can break things, such as when one header contains an HMAC of another header. Fixes Ruby Bug 7021. https://github.com/ruby/webrick/commit/8b96088a86
511 lines
13 KiB
Ruby
511 lines
13 KiB
Ruby
# frozen_string_literal: false
|
|
#
|
|
# httputils.rb -- HTTPUtils Module
|
|
#
|
|
# Author: IPR -- Internet Programming with Ruby -- writers
|
|
# Copyright (c) 2000, 2001 TAKAHASHI Masayoshi, GOTOU Yuuzou
|
|
# Copyright (c) 2002 Internet Programming with Ruby writers. All rights
|
|
# reserved.
|
|
#
|
|
# $IPR: httputils.rb,v 1.34 2003/06/05 21:34:08 gotoyuzo Exp $
|
|
|
|
require 'socket'
|
|
require 'tempfile'
|
|
|
|
module WEBrick
|
|
CR = "\x0d" # :nodoc:
|
|
LF = "\x0a" # :nodoc:
|
|
CRLF = "\x0d\x0a" # :nodoc:
|
|
|
|
##
|
|
# HTTPUtils provides utility methods for working with the HTTP protocol.
|
|
#
|
|
# This module is generally used internally by WEBrick
|
|
|
|
module HTTPUtils
|
|
|
|
##
|
|
# Normalizes a request path. Raises an exception if the path cannot be
|
|
# normalized.
|
|
|
|
def normalize_path(path)
|
|
raise "abnormal path `#{path}'" if path[0] != ?/
|
|
ret = path.dup
|
|
|
|
ret.gsub!(%r{/+}o, '/') # // => /
|
|
while ret.sub!(%r'/\.(?:/|\Z)', '/'); end # /. => /
|
|
while ret.sub!(%r'/(?!\.\./)[^/]+/\.\.(?:/|\Z)', '/'); end # /foo/.. => /foo
|
|
|
|
raise "abnormal path `#{path}'" if %r{/\.\.(/|\Z)} =~ ret
|
|
ret
|
|
end
|
|
module_function :normalize_path
|
|
|
|
##
|
|
# Default mime types
|
|
|
|
DefaultMimeTypes = {
|
|
"ai" => "application/postscript",
|
|
"asc" => "text/plain",
|
|
"avi" => "video/x-msvideo",
|
|
"bin" => "application/octet-stream",
|
|
"bmp" => "image/bmp",
|
|
"class" => "application/octet-stream",
|
|
"cer" => "application/pkix-cert",
|
|
"crl" => "application/pkix-crl",
|
|
"crt" => "application/x-x509-ca-cert",
|
|
#"crl" => "application/x-pkcs7-crl",
|
|
"css" => "text/css",
|
|
"dms" => "application/octet-stream",
|
|
"doc" => "application/msword",
|
|
"dvi" => "application/x-dvi",
|
|
"eps" => "application/postscript",
|
|
"etx" => "text/x-setext",
|
|
"exe" => "application/octet-stream",
|
|
"gif" => "image/gif",
|
|
"htm" => "text/html",
|
|
"html" => "text/html",
|
|
"jpe" => "image/jpeg",
|
|
"jpeg" => "image/jpeg",
|
|
"jpg" => "image/jpeg",
|
|
"js" => "application/javascript",
|
|
"json" => "application/json",
|
|
"lha" => "application/octet-stream",
|
|
"lzh" => "application/octet-stream",
|
|
"mov" => "video/quicktime",
|
|
"mpe" => "video/mpeg",
|
|
"mpeg" => "video/mpeg",
|
|
"mpg" => "video/mpeg",
|
|
"pbm" => "image/x-portable-bitmap",
|
|
"pdf" => "application/pdf",
|
|
"pgm" => "image/x-portable-graymap",
|
|
"png" => "image/png",
|
|
"pnm" => "image/x-portable-anymap",
|
|
"ppm" => "image/x-portable-pixmap",
|
|
"ppt" => "application/vnd.ms-powerpoint",
|
|
"ps" => "application/postscript",
|
|
"qt" => "video/quicktime",
|
|
"ras" => "image/x-cmu-raster",
|
|
"rb" => "text/plain",
|
|
"rd" => "text/plain",
|
|
"rtf" => "application/rtf",
|
|
"sgm" => "text/sgml",
|
|
"sgml" => "text/sgml",
|
|
"svg" => "image/svg+xml",
|
|
"tif" => "image/tiff",
|
|
"tiff" => "image/tiff",
|
|
"txt" => "text/plain",
|
|
"wasm" => "application/wasm",
|
|
"xbm" => "image/x-xbitmap",
|
|
"xhtml" => "text/html",
|
|
"xls" => "application/vnd.ms-excel",
|
|
"xml" => "text/xml",
|
|
"xpm" => "image/x-xpixmap",
|
|
"xwd" => "image/x-xwindowdump",
|
|
"zip" => "application/zip",
|
|
}
|
|
|
|
##
|
|
# Loads Apache-compatible mime.types in +file+.
|
|
|
|
def load_mime_types(file)
|
|
# note: +file+ may be a "| command" for now; some people may
|
|
# rely on this, but currently we do not use this method by default.
|
|
open(file){ |io|
|
|
hash = Hash.new
|
|
io.each{ |line|
|
|
next if /^#/ =~ line
|
|
line.chomp!
|
|
mimetype, ext0 = line.split(/\s+/, 2)
|
|
next unless ext0
|
|
next if ext0.empty?
|
|
ext0.split(/\s+/).each{ |ext| hash[ext] = mimetype }
|
|
}
|
|
hash
|
|
}
|
|
end
|
|
module_function :load_mime_types
|
|
|
|
##
|
|
# Returns the mime type of +filename+ from the list in +mime_tab+. If no
|
|
# mime type was found application/octet-stream is returned.
|
|
|
|
def mime_type(filename, mime_tab)
|
|
suffix1 = (/\.(\w+)$/ =~ filename && $1.downcase)
|
|
suffix2 = (/\.(\w+)\.[\w\-]+$/ =~ filename && $1.downcase)
|
|
mime_tab[suffix1] || mime_tab[suffix2] || "application/octet-stream"
|
|
end
|
|
module_function :mime_type
|
|
|
|
##
|
|
# Parses an HTTP header +raw+ into a hash of header fields with an Array
|
|
# of values.
|
|
|
|
def parse_header(raw)
|
|
header = Hash.new([].freeze)
|
|
field = nil
|
|
raw.each_line{|line|
|
|
case line
|
|
when /^([A-Za-z0-9!\#$%&'*+\-.^_`|~]+):\s*(.*?)\s*\z/om
|
|
field, value = $1, $2
|
|
field.downcase!
|
|
header[field] = [] unless header.has_key?(field)
|
|
header[field] << value
|
|
when /^\s+(.*?)\s*\z/om
|
|
value = $1
|
|
unless field
|
|
raise HTTPStatus::BadRequest, "bad header '#{line}'."
|
|
end
|
|
header[field][-1] << " " << value
|
|
else
|
|
raise HTTPStatus::BadRequest, "bad header '#{line}'."
|
|
end
|
|
}
|
|
header.each{|key, values|
|
|
values.each(&:strip!)
|
|
}
|
|
header
|
|
end
|
|
module_function :parse_header
|
|
|
|
##
|
|
# Splits a header value +str+ according to HTTP specification.
|
|
|
|
def split_header_value(str)
|
|
str.scan(%r'\G((?:"(?:\\.|[^"])+?"|[^",]+)+)
|
|
(?:,\s*|\Z)'xn).flatten
|
|
end
|
|
module_function :split_header_value
|
|
|
|
##
|
|
# Parses a Range header value +ranges_specifier+
|
|
|
|
def parse_range_header(ranges_specifier)
|
|
if /^bytes=(.*)/ =~ ranges_specifier
|
|
byte_range_set = split_header_value($1)
|
|
byte_range_set.collect{|range_spec|
|
|
case range_spec
|
|
when /^(\d+)-(\d+)/ then $1.to_i .. $2.to_i
|
|
when /^(\d+)-/ then $1.to_i .. -1
|
|
when /^-(\d+)/ then -($1.to_i) .. -1
|
|
else return nil
|
|
end
|
|
}
|
|
end
|
|
end
|
|
module_function :parse_range_header
|
|
|
|
##
|
|
# Parses q values in +value+ as used in Accept headers.
|
|
|
|
def parse_qvalues(value)
|
|
tmp = []
|
|
if value
|
|
parts = value.split(/,\s*/)
|
|
parts.each {|part|
|
|
if m = %r{^([^\s,]+?)(?:;\s*q=(\d+(?:\.\d+)?))?$}.match(part)
|
|
val = m[1]
|
|
q = (m[2] or 1).to_f
|
|
tmp.push([val, q])
|
|
end
|
|
}
|
|
tmp = tmp.sort_by{|val, q| -q}
|
|
tmp.collect!{|val, q| val}
|
|
end
|
|
return tmp
|
|
end
|
|
module_function :parse_qvalues
|
|
|
|
##
|
|
# Removes quotes and escapes from +str+
|
|
|
|
def dequote(str)
|
|
ret = (/\A"(.*)"\Z/ =~ str) ? $1 : str.dup
|
|
ret.gsub!(/\\(.)/, "\\1")
|
|
ret
|
|
end
|
|
module_function :dequote
|
|
|
|
##
|
|
# Quotes and escapes quotes in +str+
|
|
|
|
def quote(str)
|
|
'"' << str.gsub(/[\\\"]/o, "\\\1") << '"'
|
|
end
|
|
module_function :quote
|
|
|
|
##
|
|
# Stores multipart form data. FormData objects are created when
|
|
# WEBrick::HTTPUtils.parse_form_data is called.
|
|
|
|
class FormData < String
|
|
EmptyRawHeader = [].freeze # :nodoc:
|
|
EmptyHeader = {}.freeze # :nodoc:
|
|
|
|
##
|
|
# The name of the form data part
|
|
|
|
attr_accessor :name
|
|
|
|
##
|
|
# The filename of the form data part
|
|
|
|
attr_accessor :filename
|
|
|
|
attr_accessor :next_data # :nodoc:
|
|
protected :next_data
|
|
|
|
##
|
|
# Creates a new FormData object.
|
|
#
|
|
# +args+ is an Array of form data entries. One FormData will be created
|
|
# for each entry.
|
|
#
|
|
# This is called by WEBrick::HTTPUtils.parse_form_data for you
|
|
|
|
def initialize(*args)
|
|
@name = @filename = @next_data = nil
|
|
if args.empty?
|
|
@raw_header = []
|
|
@header = nil
|
|
super("")
|
|
else
|
|
@raw_header = EmptyRawHeader
|
|
@header = EmptyHeader
|
|
super(args.shift)
|
|
unless args.empty?
|
|
@next_data = self.class.new(*args)
|
|
end
|
|
end
|
|
end
|
|
|
|
##
|
|
# Retrieves the header at the first entry in +key+
|
|
|
|
def [](*key)
|
|
begin
|
|
@header[key[0].downcase].join(", ")
|
|
rescue StandardError, NameError
|
|
super
|
|
end
|
|
end
|
|
|
|
##
|
|
# Adds +str+ to this FormData which may be the body, a header or a
|
|
# header entry.
|
|
#
|
|
# This is called by WEBrick::HTTPUtils.parse_form_data for you
|
|
|
|
def <<(str)
|
|
if @header
|
|
super
|
|
elsif str == CRLF
|
|
@header = HTTPUtils::parse_header(@raw_header.join)
|
|
if cd = self['content-disposition']
|
|
if /\s+name="(.*?)"/ =~ cd then @name = $1 end
|
|
if /\s+filename="(.*?)"/ =~ cd then @filename = $1 end
|
|
end
|
|
else
|
|
@raw_header << str
|
|
end
|
|
self
|
|
end
|
|
|
|
##
|
|
# Adds +data+ at the end of the chain of entries
|
|
#
|
|
# This is called by WEBrick::HTTPUtils.parse_form_data for you.
|
|
|
|
def append_data(data)
|
|
tmp = self
|
|
while tmp
|
|
unless tmp.next_data
|
|
tmp.next_data = data
|
|
break
|
|
end
|
|
tmp = tmp.next_data
|
|
end
|
|
self
|
|
end
|
|
|
|
##
|
|
# Yields each entry in this FormData
|
|
|
|
def each_data
|
|
tmp = self
|
|
while tmp
|
|
next_data = tmp.next_data
|
|
yield(tmp)
|
|
tmp = next_data
|
|
end
|
|
end
|
|
|
|
##
|
|
# Returns all the FormData as an Array
|
|
|
|
def list
|
|
ret = []
|
|
each_data{|data|
|
|
ret << data.to_s
|
|
}
|
|
ret
|
|
end
|
|
|
|
##
|
|
# A FormData will behave like an Array
|
|
|
|
alias :to_ary :list
|
|
|
|
##
|
|
# This FormData's body
|
|
|
|
def to_s
|
|
String.new(self)
|
|
end
|
|
end
|
|
|
|
##
|
|
# Parses the query component of a URI in +str+
|
|
|
|
def parse_query(str)
|
|
query = Hash.new
|
|
if str
|
|
str.split(/[&;]/).each{|x|
|
|
next if x.empty?
|
|
key, val = x.split(/=/,2)
|
|
key = unescape_form(key)
|
|
val = unescape_form(val.to_s)
|
|
val = FormData.new(val)
|
|
val.name = key
|
|
if query.has_key?(key)
|
|
query[key].append_data(val)
|
|
next
|
|
end
|
|
query[key] = val
|
|
}
|
|
end
|
|
query
|
|
end
|
|
module_function :parse_query
|
|
|
|
##
|
|
# Parses form data in +io+ with the given +boundary+
|
|
|
|
def parse_form_data(io, boundary)
|
|
boundary_regexp = /\A--#{Regexp.quote(boundary)}(--)?#{CRLF}\z/
|
|
form_data = Hash.new
|
|
return form_data unless io
|
|
data = nil
|
|
io.each_line{|line|
|
|
if boundary_regexp =~ line
|
|
if data
|
|
data.chop!
|
|
key = data.name
|
|
if form_data.has_key?(key)
|
|
form_data[key].append_data(data)
|
|
else
|
|
form_data[key] = data
|
|
end
|
|
end
|
|
data = FormData.new
|
|
next
|
|
else
|
|
if data
|
|
data << line
|
|
end
|
|
end
|
|
}
|
|
return form_data
|
|
end
|
|
module_function :parse_form_data
|
|
|
|
#####
|
|
|
|
reserved = ';/?:@&=+$,'
|
|
num = '0123456789'
|
|
lowalpha = 'abcdefghijklmnopqrstuvwxyz'
|
|
upalpha = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
|
mark = '-_.!~*\'()'
|
|
unreserved = num + lowalpha + upalpha + mark
|
|
control = (0x0..0x1f).collect{|c| c.chr }.join + "\x7f"
|
|
space = " "
|
|
delims = '<>#%"'
|
|
unwise = '{}|\\^[]`'
|
|
nonascii = (0x80..0xff).collect{|c| c.chr }.join
|
|
|
|
module_function
|
|
|
|
# :stopdoc:
|
|
|
|
def _make_regex(str) /([#{Regexp.escape(str)}])/n end
|
|
def _make_regex!(str) /([^#{Regexp.escape(str)}])/n end
|
|
def _escape(str, regex)
|
|
str = str.b
|
|
str.gsub!(regex) {"%%%02X" % $1.ord}
|
|
# %-escaped string should contain US-ASCII only
|
|
str.force_encoding(Encoding::US_ASCII)
|
|
end
|
|
def _unescape(str, regex)
|
|
str = str.b
|
|
str.gsub!(regex) {$1.hex.chr}
|
|
# encoding of %-unescaped string is unknown
|
|
str
|
|
end
|
|
|
|
UNESCAPED = _make_regex(control+space+delims+unwise+nonascii)
|
|
UNESCAPED_FORM = _make_regex(reserved+control+delims+unwise+nonascii)
|
|
NONASCII = _make_regex(nonascii)
|
|
ESCAPED = /%([0-9a-fA-F]{2})/
|
|
UNESCAPED_PCHAR = _make_regex!(unreserved+":@&=+$,")
|
|
|
|
# :startdoc:
|
|
|
|
##
|
|
# Escapes HTTP reserved and unwise characters in +str+
|
|
|
|
def escape(str)
|
|
_escape(str, UNESCAPED)
|
|
end
|
|
|
|
##
|
|
# Unescapes HTTP reserved and unwise characters in +str+
|
|
|
|
def unescape(str)
|
|
_unescape(str, ESCAPED)
|
|
end
|
|
|
|
##
|
|
# Escapes form reserved characters in +str+
|
|
|
|
def escape_form(str)
|
|
ret = _escape(str, UNESCAPED_FORM)
|
|
ret.gsub!(/ /, "+")
|
|
ret
|
|
end
|
|
|
|
##
|
|
# Unescapes form reserved characters in +str+
|
|
|
|
def unescape_form(str)
|
|
_unescape(str.gsub(/\+/, " "), ESCAPED)
|
|
end
|
|
|
|
##
|
|
# Escapes path +str+
|
|
|
|
def escape_path(str)
|
|
result = ""
|
|
str.scan(%r{/([^/]*)}).each{|i|
|
|
result << "/" << _escape(i[0], UNESCAPED_PCHAR)
|
|
}
|
|
return result
|
|
end
|
|
|
|
##
|
|
# Escapes 8 bit characters in +str+
|
|
|
|
def escape8bit(str)
|
|
_escape(str, NONASCII)
|
|
end
|
|
end
|
|
end
|