ruby--ruby/lib/webrick/httputils.rb

#
# httputils.rb -- HTTPUtils Module
#
# Author: IPR -- Internet Programming with Ruby -- writers
# Copyright (c) 2000, 2001 TAKAHASHI Masayoshi, GOTOU Yuuzou
# Copyright (c) 2002 Internet Programming with Ruby writers. All rights
# reserved.
#
# $IPR: httputils.rb,v 1.34 2003/06/05 21:34:08 gotoyuzo Exp $

require 'socket'
require 'tempfile'

module WEBrick
  CR   = "\x0d"     # :nodoc:
  LF   = "\x0a"     # :nodoc:
  CRLF = "\x0d\x0a" # :nodoc:

  ##
  # HTTPUtils provides utility methods for working with the HTTP protocol.
  #
  # This module is generally used internally by WEBrick

  module HTTPUtils

    ##
    # Normalizes a request path.  Raises an exception if the path cannot be
    # normalized.

    def normalize_path(path)
      raise "abnormal path `#{path}'" if path[0] != ?/
      ret = path.dup

      ret.gsub!(%r{/+}o, '/')                    # //      => /
      while ret.sub!(%r'/\.(?:/|\Z)', '/'); end  # /.      => /
      while ret.sub!(%r'/(?!\.\./)[^/]+/\.\.(?:/|\Z)', '/'); end # /foo/.. => /foo

      raise "abnormal path `#{path}'" if %r{/\.\.(/|\Z)} =~ ret
      ret
    end
    module_function :normalize_path

    ##
    # Default mime types

    DefaultMimeTypes = {
      "ai"    => "application/postscript",
      "asc"   => "text/plain",
      "avi"   => "video/x-msvideo",
      "bin"   => "application/octet-stream",
      "bmp"   => "image/bmp",
      "class" => "application/octet-stream",
      "cer"   => "application/pkix-cert",
      "crl"   => "application/pkix-crl",
      "crt"   => "application/x-x509-ca-cert",
     #"crl"   => "application/x-pkcs7-crl",
      "css"   => "text/css",
      "dms"   => "application/octet-stream",
      "doc"   => "application/msword",
      "dvi"   => "application/x-dvi",
      "eps"   => "application/postscript",
      "etx"   => "text/x-setext",
      "exe"   => "application/octet-stream",
      "gif"   => "image/gif",
      "htm"   => "text/html",
      "html"  => "text/html",
      "jpe"   => "image/jpeg",
      "jpeg"  => "image/jpeg",
      "jpg"   => "image/jpeg",
      "js"    => "application/javascript",
      "lha"   => "application/octet-stream",
      "lzh"   => "application/octet-stream",
      "mov"   => "video/quicktime",
      "mpe"   => "video/mpeg",
      "mpeg"  => "video/mpeg",
      "mpg"   => "video/mpeg",
      "pbm"   => "image/x-portable-bitmap",
      "pdf"   => "application/pdf",
      "pgm"   => "image/x-portable-graymap",
      "png"   => "image/png",
      "pnm"   => "image/x-portable-anymap",
      "ppm"   => "image/x-portable-pixmap",
      "ppt"   => "application/vnd.ms-powerpoint",
      "ps"    => "application/postscript",
      "qt"    => "video/quicktime",
      "ras"   => "image/x-cmu-raster",
      "rb"    => "text/plain",
      "rd"    => "text/plain",
      "rtf"   => "application/rtf",
      "sgm"   => "text/sgml",
      "sgml"  => "text/sgml",
      "svg"   => "image/svg+xml",
      "tif"   => "image/tiff",
      "tiff"  => "image/tiff",
      "txt"   => "text/plain",
      "xbm"   => "image/x-xbitmap",
      "xhtml" => "text/html",
      "xls"   => "application/vnd.ms-excel",
      "xml"   => "text/xml",
      "xpm"   => "image/x-xpixmap",
      "xwd"   => "image/x-xwindowdump",
      "zip"   => "application/zip",
    }

    ##
    # Loads Apache-compatible mime.types in +file+.

    def load_mime_types(file)
      open(file){ |io|
        hash = Hash.new
        io.each{ |line|
          next if /^#/ =~ line
          line.chomp!
          mimetype, ext0 = line.split(/\s+/, 2)
          next unless ext0
          next if ext0.empty?
          ext0.split(/\s+/).each{ |ext| hash[ext] = mimetype }
        }
        hash
      }
    end
    module_function :load_mime_types

    ##
    # Returns the mime type of +filename+ from the list in +mime_tab+.  If no
    # mime type was found application/octet-stream is returned.

    def mime_type(filename, mime_tab)
      suffix1 = (/\.(\w+)$/ =~ filename && $1.downcase)
      suffix2 = (/\.(\w+)\.[\w\-]+$/ =~ filename && $1.downcase)
      mime_tab[suffix1] || mime_tab[suffix2] || "application/octet-stream"
    end
    module_function :mime_type

    ##
    # Parses an HTTP header +raw+ into a hash of header fields with an Array
    # of values.

    def parse_header(raw)
      header = Hash.new([].freeze)
      field = nil
      raw.each_line{|line|
        case line
        when /^([A-Za-z0-9!\#$%&'*+\-.^_`|~]+):\s*(.*?)\s*\z/om
          field, value = $1, $2
          field.downcase!
          header[field] = [] unless header.has_key?(field)
          header[field] << value
        when /^\s+(.*?)\s*\z/om
          value = $1
          unless field
            raise HTTPStatus::BadRequest, "bad header '#{line}'."
          end
          header[field][-1] << " " << value
        else
          raise HTTPStatus::BadRequest, "bad header '#{line}'."
        end
      }
      header.each{|key, values|
        values.each{|value|
          value.strip!
          value.gsub!(/\s+/, " ")
        }
      }
      header
    end
    module_function :parse_header

    ##
    # Splits a header value +str+ according to HTTP specification.

    def split_header_value(str)
      str.scan(%r'\G((?:"(?:\\.|[^"])+?"|[^",]+)+)
                    (?:,\s*|\Z)'xn).flatten
    end
    module_function :split_header_value

    ##
    # Parses a Range header value +ranges_specifier+

    def parse_range_header(ranges_specifier)
      if /^bytes=(.*)/ =~ ranges_specifier
        byte_range_set = split_header_value($1)
        byte_range_set.collect{|range_spec|
          case range_spec
          when /^(\d+)-(\d+)/ then $1.to_i .. $2.to_i
          when /^(\d+)-/      then $1.to_i .. -1
          when /^-(\d+)/      then -($1.to_i) .. -1
          else return nil
          end
        }
      end
    end
    module_function :parse_range_header

    ##
    # Parses q values in +value+ as used in Accept headers.

    def parse_qvalues(value)
      tmp = []
      if value
        parts = value.split(/,\s*/)
        parts.each {|part|
          if m = %r{^([^\s,]+?)(?:;\s*q=(\d+(?:\.\d+)?))?$}.match(part)
            val = m[1]
            q = (m[2] or 1).to_f
            tmp.push([val, q])
          end
        }
        tmp = tmp.sort_by{|val, q| -q}
        tmp.collect!{|val, q| val}
      end
      return tmp
    end
    module_function :parse_qvalues

    ##
    # Removes quotes and escapes from +str+

    def dequote(str)
      ret = (/\A"(.*)"\Z/ =~ str) ? $1 : str.dup
      ret.gsub!(/\\(.)/, "\\1")
      ret
    end
    module_function :dequote

    ##
    # Quotes and escapes quotes in +str+

    def quote(str)
      '"' << str.gsub(/[\\\"]/o, "\\\1") << '"'
    end
    module_function :quote

    ##
    # Stores multipart form data.  FormData objects are created when
    # WEBrick::HTTPUtils.parse_form_data is called.

    class FormData < String
      EmptyRawHeader = [].freeze # :nodoc:
      EmptyHeader = {}.freeze # :nodoc:

      ##
      # The name of the form data part

      attr_accessor :name

      ##
      # The filename of the form data part

      attr_accessor :filename

      attr_accessor :next_data # :nodoc:
      protected :next_data

      ##
      # Creates a new FormData object.
      #
      # +args+ is an Array of form data entries.  One FormData will be created
      # for each entry.
      #
      # This is called by WEBrick::HTTPUtils.parse_form_data for you

      def initialize(*args)
        @name = @filename = @next_data = nil
        if args.empty?
          @raw_header = []
          @header = nil
          super("")
        else
          @raw_header = EmptyRawHeader
          @header = EmptyHeader
          super(args.shift)
          unless args.empty?
            @next_data = self.class.new(*args)
          end
        end
      end

      ##
      # Retrieves the header at the first entry in +key+

      def [](*key)
        begin
          @header[key[0].downcase].join(", ")
        rescue StandardError, NameError
          super
        end
      end

      ##
      # Adds +str+ to this FormData which may be the body, a header or a
      # header entry.
      #
      # This is called by WEBrick::HTTPUtils.parse_form_data for you

      def <<(str)
        if @header
          super
        elsif str == CRLF
          @header = HTTPUtils::parse_header(@raw_header.join)
          if cd = self['content-disposition']
            if /\s+name="(.*?)"/ =~ cd then @name = $1 end
            if /\s+filename="(.*?)"/ =~ cd then @filename = $1 end
          end
        else
          @raw_header << str
        end
        self
      end

      ##
      # Adds +data+ at the end of the chain of entries
      #
      # This is called by WEBrick::HTTPUtils.parse_form_data for you.

      def append_data(data)
        tmp = self
        while tmp
          unless tmp.next_data
            tmp.next_data = data
            break
          end
          tmp = tmp.next_data
        end
        self
      end

      ##
      # Yields each entry in this FormData

      def each_data
        tmp = self
        while tmp
          next_data = tmp.next_data
          yield(tmp)
          tmp = next_data
        end
      end

      ##
      # Returns all the FormData as an Array

      def list
        ret = []
        each_data{|data|
          ret << data.to_s
        }
        ret
      end

      ##
      # A FormData will behave like an Array

      alias :to_ary :list

      ##
      # This FormData's body

      def to_s
        String.new(self)
      end
    end

    ##
    # Parses the query component of a URI in +str+

    def parse_query(str)
      query = Hash.new
      if str
        str.split(/[&;]/).each{|x|
          next if x.empty?
          key, val = x.split(/=/,2)
          key = unescape_form(key)
          val = unescape_form(val.to_s)
          val = FormData.new(val)
          val.name = key
          if query.has_key?(key)
            query[key].append_data(val)
            next
          end
          query[key] = val
        }
      end
      query
    end
    module_function :parse_query

    ##
    # Parses form data in +io+ with the given +boundary+

    def parse_form_data(io, boundary)
      boundary_regexp = /\A--#{Regexp.quote(boundary)}(--)?#{CRLF}\z/
      form_data = Hash.new
      return form_data unless io
      data = nil
      io.each_line{|line|
        if boundary_regexp =~ line
          if data
            data.chop!
            key = data.name
            if form_data.has_key?(key)
              form_data[key].append_data(data)
            else
              form_data[key] = data
            end
          end
          data = FormData.new
          next
        else
          if data
            data << line
          end
        end
      }
      return form_data
    end
    module_function :parse_form_data

    #####

    reserved = ';/?:@&=+$,'
    num      = '0123456789'
    lowalpha = 'abcdefghijklmnopqrstuvwxyz'
    upalpha  = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    mark     = '-_.!~*\'()'
    unreserved = num + lowalpha + upalpha + mark
    control  = (0x0..0x1f).collect{|c| c.chr }.join + "\x7f"
    space    = " "
    delims   = '<>#%"'
    unwise   = '{}|\\^[]`'
    nonascii = (0x80..0xff).collect{|c| c.chr }.join

    module_function

    # :stopdoc:

    def _make_regex(str) /([#{Regexp.escape(str)}])/n end
    def _make_regex!(str) /([^#{Regexp.escape(str)}])/n end
    def _escape(str, regex)
      str = str.b
      str.gsub!(regex) {"%%%02X" % $1.ord}
      # %-escaped string should contain US-ASCII only
      str.force_encoding(Encoding::US_ASCII)
    end
    def _unescape(str, regex)
      str = str.b
      str.gsub!(regex) {$1.hex.chr}
      # encoding of %-unescaped string is unknown
      str
    end

    UNESCAPED = _make_regex(control+space+delims+unwise+nonascii)
    UNESCAPED_FORM = _make_regex(reserved+control+delims+unwise+nonascii)
    NONASCII  = _make_regex(nonascii)
    ESCAPED   = /%([0-9a-fA-F]{2})/
    UNESCAPED_PCHAR = _make_regex!(unreserved+":@&=+$,")

    # :startdoc:

    ##
    # Escapes HTTP reserved and unwise characters in +str+

    def escape(str)
      _escape(str, UNESCAPED)
    end

    ##
    # Unescapes HTTP reserved and unwise characters in +str+

    def unescape(str)
      _unescape(str, ESCAPED)
    end

    ##
    # Escapes form reserved characters in +str+

    def escape_form(str)
      ret = _escape(str, UNESCAPED_FORM)
      ret.gsub!(/ /, "+")
      ret
    end

    ##
    # Unescapes form reserved characters in +str+

    def unescape_form(str)
      _unescape(str.gsub(/\+/, " "), ESCAPED)
    end

    ##
    # Escapes path +str+

    def escape_path(str)
      result = ""
      str.scan(%r{/([^/]*)}).each{|i|
        result << "/" << _escape(i[0], UNESCAPED_PCHAR)
      }
      return result
    end

    ##
    # Escapes 8 bit characters in +str+

    def escape8bit(str)
      _escape(str, NONASCII)
    end
  end
end